diff --git a/.gitignore b/.gitignore index f0f409c246fa6e8fa3e4c862959e184eb3556108..291a3dd611addcc307a74a444814a32d914982e9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ /target *.dot *.bc +*.out +*.ll +*.c +*.o diff --git a/Cargo.lock b/Cargo.lock index 2dc1fecb35270afe9fc1a62f0c90eece5b283b08..bfcc04d022442a77417a1dbf3fd1075f51c8d9e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -191,6 +191,15 @@ dependencies = [ "ordered-float", ] +[[package]] +name = "hercules_matmul" +version = "0.1.0" +dependencies = [ + "clap", + "hercules_rt", + "rand", +] + [[package]] name = "hercules_opt" version = "0.1.0" @@ -198,6 +207,13 @@ dependencies = [ "hercules_ir", ] +[[package]] +name = "hercules_rt" +version = "0.1.0" +dependencies = [ + "libc", +] + [[package]] name = "libc" version = "0.2.153" diff --git a/Cargo.toml b/Cargo.toml index d6a8629e19126222a539896445cab51e9792716c..8320ec1520c225375638586a3b7c12e5ab4501ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,10 @@ members = [ "hercules_cg", "hercules_ir", "hercules_opt", + "hercules_rt", + "hercules_tools/hercules_dot", - "hercules_tools/hercules_cpu" + "hercules_tools/hercules_cpu", + + "hercules_samples/matmul" ] diff --git a/hercules_cg/src/cpu_beta.rs b/hercules_cg/src/cpu_beta.rs index bb73e1d348b31ccbe1b3e71fd1852364da3d08ce..e6c858fd6b5b9574c5e3ea36d63684bf21754567 100644 --- a/hercules_cg/src/cpu_beta.rs +++ b/hercules_cg/src/cpu_beta.rs @@ -165,7 +165,11 @@ pub fn cpu_beta_codegen<W: Write>( } } - // Step 4: do codegen for each function. + // Step 4: generate dummy uninitialized global - this is needed so that + // there'll be a non-empty .bss section in the ELF object file. + write!(w, "@dummy = dso_local global i32 0, align 4\n")?; + + // Step 5: do codegen for each function. for function_idx in 0..functions.len() { let function = &functions[function_idx]; let typing = &typing[function_idx]; @@ -176,7 +180,7 @@ pub fn cpu_beta_codegen<W: Write>( let fork_join_map = &fork_join_maps[function_idx]; let fork_join_nest = &fork_join_nests[function_idx]; - // Step 4.1: emit function signature. + // Step 5.1: emit function signature. let llvm_ret_type = &llvm_types[function.return_type.idx()]; let mut llvm_params = function .param_types @@ -198,7 +202,7 @@ pub fn cpu_beta_codegen<W: Write>( } write!(w, ") {{\n")?; - // Step 4.2: emit basic blocks. A node represents a basic block if its + // Step 5.2: emit basic blocks. A node represents a basic block if its // entry in the basic blocks vector points to itself. Each basic block // is created as four strings: the block header, the block's phis, the // block's data computations, and the block's terminator instruction. @@ -217,7 +221,7 @@ pub fn cpu_beta_codegen<W: Write>( } } - // Step 4.3: emit nodes. Nodes are emitted into basic blocks separately + // Step 5.3: emit nodes. Nodes are emitted into basic blocks separately // as nodes are not necessarily emitted in order. Assemble worklist of // nodes, starting as reverse post order of nodes. For non-phi and non- // reduce nodes, only emit once all data uses are emitted. In addition, @@ -261,7 +265,7 @@ pub fn cpu_beta_codegen<W: Write>( } } - // Step 4.4: put basic blocks in order. + // Step 5.4: put basic blocks in order. for node in reverse_postorder { if bb[node.idx()] == *node { write!( @@ -275,7 +279,7 @@ pub fn cpu_beta_codegen<W: Write>( } } - // Step 4.5: close function. + // Step 5.5: close function. write!(w, "}}\n")?; } diff --git a/hercules_rt/Cargo.toml b/hercules_rt/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..500265c40d3821a0077b16ed65d2dcff9a5075d4 --- /dev/null +++ b/hercules_rt/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "hercules_rt" +version = "0.1.0" +authors = ["Russel Arbore <rarbore2@illinois.edu>"] + +[dependencies] +libc = "*" diff --git a/hercules_rt/src/elf.rs b/hercules_rt/src/elf.rs new file mode 100644 index 0000000000000000000000000000000000000000..9fc9dc3b60ba34b5646f28785d439d9f1a0d5ee7 --- /dev/null +++ b/hercules_rt/src/elf.rs @@ -0,0 +1,211 @@ +extern crate libc; + +use std::ffi::CStr; +use std::mem::size_of; +use std::ptr::copy_nonoverlapping; +use std::ptr::null_mut; +use std::ptr::read_unaligned; + +use self::libc::*; + +/* + * The libc crate doesn't have everything from elf.h, so these things need to be + * manually defined. + */ + +#[repr(C)] +#[derive(Debug)] +struct Elf64_Rela { + r_offset: Elf64_Addr, + r_info: Elf64_Xword, + r_addend: Elf64_Sxword, +} + +const R_X86_64_PC32: u64 = 2; +const R_X86_64_PLT32: u64 = 4; +const STT_FUNC: u8 = 2; + +/* + * Holds a mmaped copy of .text + .bss for direct execution, plus metadata for + * each function. The .bss section holds a table storing addresses to internal + * runtime functions, since this is literally easier than patching the object + * code to directly jump to those runtime functions. + */ +#[derive(Debug)] +pub(crate) struct Elf { + pub(crate) function_names: Vec<String>, + pub(crate) function_pointers: Vec<isize>, + pub(crate) program_section: *mut u8, + pub(crate) program_size: usize, +} + +impl Drop for Elf { + fn drop(&mut self) { + unsafe { munmap(self.program_section as *mut _, self.program_size) }; + } +} + +/* + * Function for parsing our internal memory representation of an ELF file from + * the raw bytes of an ELF file. This includes creating a executable section of + * code, and relocating function calls and global variables. This whole thing is + * very unsafe, and is predicated on the elf parameter referencing properly + * formatted bytes. + */ +pub(crate) unsafe fn parse_elf(elf: &[u8]) -> Elf { + fn page_align(n: usize) -> usize { + (n + (4096 - 1)) & !(4096 - 1) + } + + // read_unaligned corresponds to memcpys in C - we need to memcpy structs + // out of the file's bytes, since they may be stored without proper + // alignment. + let header: Elf64_Ehdr = read_unaligned(elf.as_ptr() as *const _); + assert!(header.e_shentsize as usize == size_of::<Elf64_Shdr>()); + let section_header_table: Box<[_]> = (0..header.e_shnum) + .map(|idx| { + read_unaligned( + (elf.as_ptr().offset(header.e_shoff as isize) as *const Elf64_Shdr) + .offset(idx as isize), + ) + }) + .collect(); + + // Look for the .symtab, .strtab, .text, .bss, and .rela.text sections. Only + // the .rela.text section is not necessary. + let mut symtab_ndx = -1; + let mut strtab_ndx = -1; + let mut text_ndx = -1; + let mut bss_ndx = -1; + let mut rela_text_ndx = -1; + let shstrtab = &elf[section_header_table[header.e_shstrndx as usize].sh_offset as usize..]; + for i in 0..header.e_shnum as usize { + let section_name = &shstrtab[section_header_table[i].sh_name as usize..]; + if section_name.starts_with(b".symtab") { + symtab_ndx = i as i32; + } else if section_name.starts_with(b".strtab") { + strtab_ndx = i as i32; + } else if section_name.starts_with(b".text") { + text_ndx = i as i32; + } else if section_name.starts_with(b".bss") { + bss_ndx = i as i32; + } else if section_name.starts_with(b".rela.text") { + rela_text_ndx = i as i32; + } + } + assert!(symtab_ndx != -1); + assert!(strtab_ndx != -1); + assert!(text_ndx != -1); + assert!(bss_ndx != -1); + + // Get the headers for the required sections. + let symtab_hdr = section_header_table[symtab_ndx as usize]; + let strtab_hdr = section_header_table[strtab_ndx as usize]; + let text_hdr = section_header_table[text_ndx as usize]; + let bss_hdr = section_header_table[bss_ndx as usize]; + + // Collect the symbols in the symbol table. + assert!(symtab_hdr.sh_entsize as usize == size_of::<Elf64_Sym>()); + let num_symbols = symtab_hdr.sh_size as usize / size_of::<Elf64_Sym>(); + let symbol_table: Box<[_]> = (0..num_symbols) + .map(|idx| { + read_unaligned( + (elf.as_ptr().offset(symtab_hdr.sh_offset as isize) as *const Elf64_Sym) + .offset(idx as isize), + ) + }) + .collect(); + + // The mmaped region includes both the .text and .bss sections. + let program_size = page_align(text_hdr.sh_size as usize) + page_align(bss_hdr.sh_size as usize); + let program_base = mmap( + null_mut(), + program_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, + 0, + ) as *mut u8; + let text_base = program_base; + let bss_base = text_base.offset(page_align(text_hdr.sh_size as usize) as isize); + + // Copy the object code into the mmaped region. + copy_nonoverlapping( + elf.as_ptr().offset(text_hdr.sh_offset as isize), + text_base, + text_hdr.sh_size as usize, + ); + + // If there are relocations, we process them here. + if rela_text_ndx != -1 { + let rela_text_hdr = section_header_table[rela_text_ndx as usize]; + let num_relocations = rela_text_hdr.sh_size / rela_text_hdr.sh_entsize; + + // We only iterate the relocations in order, so no need to collect. + let relocations = (0..num_relocations).map(|idx| { + read_unaligned( + (elf.as_ptr().offset(rela_text_hdr.sh_offset as isize) as *const Elf64_Rela) + .offset(idx as isize), + ) + }); + for relocation in relocations { + let symbol_idx = relocation.r_info >> 32; + let ty = relocation.r_info & 0xFFFFFFFF; + let patch_offset = text_base.offset(relocation.r_offset as isize); + + // We support PLT32 relocations only in the .text section, and PC32 + // relocations only in the .bss section. + match ty { + R_X86_64_PLT32 => { + let symbol_address = + text_base.offset(symbol_table[symbol_idx as usize].st_value as isize); + let patch = symbol_address + .offset(relocation.r_addend as isize) + .offset_from(patch_offset); + (patch_offset as *mut u32).write_unaligned(patch as u32); + } + R_X86_64_PC32 => { + let symbol_address = + bss_base.offset(symbol_table[symbol_idx as usize].st_value as isize); + let patch = symbol_address + .offset(relocation.r_addend as isize) + .offset_from(patch_offset); + (patch_offset as *mut u32).write_unaligned(patch as u32); + } + _ => panic!("ERROR: Unrecognized relocation type: {}.", ty), + } + } + } + + // Make the .text section readable and executable. The .bss section should + // still be readable and writable. + mprotect( + text_base as *mut c_void, + page_align(text_hdr.sh_size as usize), + PROT_READ | PROT_EXEC, + ); + + // Construct the final in-memory ELF representation. Look up the names of + // function symbols in the string table. + let strtab = &elf[strtab_hdr.sh_offset as usize..]; + let mut elf = Elf { + function_names: vec![], + function_pointers: vec![], + program_section: program_base, + program_size, + }; + for i in 0..num_symbols { + if symbol_table[i].st_info & 0xF == STT_FUNC { + let function_name_base = &strtab[symbol_table[i].st_name as usize..]; + let function_name = CStr::from_ptr(function_name_base.as_ptr() as *const _) + .to_str() + .unwrap() + .to_owned(); + elf.function_names.push(function_name); + elf.function_pointers + .push(symbol_table[i].st_value as isize); + } + } + + elf +} diff --git a/hercules_rt/src/lib.rs b/hercules_rt/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..f04c6c6aed58926330e1dc87614b90a51c962883 --- /dev/null +++ b/hercules_rt/src/lib.rs @@ -0,0 +1,50 @@ +use std::fs::File; +use std::io::prelude::*; +use std::path::Path; + +pub(crate) mod elf; +pub(crate) use crate::elf::*; + +#[derive(Debug)] +pub struct Module { + elf: Elf, +} + +impl Module { + pub fn get_function_ptr(&self, name: &str) -> *mut u8 { + unsafe { + self.elf.program_section.offset( + self.elf.function_pointers[self + .elf + .function_names + .iter() + .position(|s| s == name) + .unwrap()], + ) + } + } +} + +pub fn load_binary(path: &Path) -> Module { + let mut f = File::open(path).unwrap(); + let mut buffer = vec![]; + f.read_to_end(&mut buffer).unwrap(); + let elf = unsafe { parse_elf(buffer.as_slice()) }; + Module { elf } +} + +/* + * An ugly, unchecked macro for looking up Hercules functions in a module. Curse + * Rust for not supporting variadic generics and type pattern matching :shrug:. + * TODO: Generate per-lookup struct type for checking that the provided types + * are correct. + */ +#[macro_export] +macro_rules! lookup_function { + ($module:expr, $function:expr, $($param_ty:ty),*, => $ret_ty:ty) => { + { + let fn_ptr: fn($($param_ty),*) -> $ret_ty = unsafe { std::mem::transmute($module.get_function_ptr($function)) }; + fn_ptr + } + }; +} diff --git a/samples/ccp_example.hir b/hercules_samples/ccp_example.hir similarity index 100% rename from samples/ccp_example.hir rename to hercules_samples/ccp_example.hir diff --git a/samples/fork_join.hir b/hercules_samples/fork_join.hir similarity index 100% rename from samples/fork_join.hir rename to hercules_samples/fork_join.hir diff --git a/samples/gvn_example.hir b/hercules_samples/gvn_example.hir similarity index 100% rename from samples/gvn_example.hir rename to hercules_samples/gvn_example.hir diff --git a/samples/invalid/bad_phi.hir b/hercules_samples/invalid/bad_phi.hir similarity index 100% rename from samples/invalid/bad_phi.hir rename to hercules_samples/invalid/bad_phi.hir diff --git a/samples/invalid/bad_phi2.hir b/hercules_samples/invalid/bad_phi2.hir similarity index 100% rename from samples/invalid/bad_phi2.hir rename to hercules_samples/invalid/bad_phi2.hir diff --git a/hercules_samples/matmul/Cargo.toml b/hercules_samples/matmul/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..945032691547814c74da7ac63e7d4a9f61a54196 --- /dev/null +++ b/hercules_samples/matmul/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "hercules_matmul" +version = "0.1.0" +authors = ["Russel Arbore <rarbore2@illinois.edu>"] + +[dependencies] +clap = { version = "*", features = ["derive"] } +hercules_rt = { path = "../../hercules_rt" } +rand = "*" diff --git a/samples/matmul.hir b/hercules_samples/matmul/matmul.hir similarity index 100% rename from samples/matmul.hir rename to hercules_samples/matmul/matmul.hir diff --git a/hercules_samples/matmul/src/main.rs b/hercules_samples/matmul/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..02aeb3a4dc97d28bc45e9221f244e2b13f158bee --- /dev/null +++ b/hercules_samples/matmul/src/main.rs @@ -0,0 +1,34 @@ +extern crate clap; + +use std::path::Path; + +fn main() { + let module = hercules_rt::load_binary(Path::new("test.o")); + + let matmul = hercules_rt::lookup_function!( + module, + "matmul", + *const f32, + *const f32, + *mut f32, + u64, + u64, + u64, + => *const f32 + ); + + let a = [[1.0f32, 2.0f32], [3.0f32, 4.0f32]]; + let b = [[5.0f32, 6.0f32], [7.0f32, 8.0f32]]; + let mut c = [[0.0f32, 0.0f32], [0.0f32, 0.0f32]]; + unsafe { + matmul( + std::mem::transmute(a.as_ptr()), + std::mem::transmute(b.as_ptr()), + std::mem::transmute(c.as_mut_ptr()), + 2, + 2, + 2, + ) + }; + println!("{} {}\n{} {}", c[0][0], c[0][1], c[1][0], c[1][1]); +} diff --git a/samples/simple1.hir b/hercules_samples/simple1.hir similarity index 100% rename from samples/simple1.hir rename to hercules_samples/simple1.hir diff --git a/samples/strset.hir b/hercules_samples/strset.hir similarity index 100% rename from samples/strset.hir rename to hercules_samples/strset.hir