Skip to content
Snippets Groups Projects
Commit 8eb92dad authored by rarbore2's avatar rarbore2
Browse files

Dynamically load object files

parent 98303281
No related branches found
No related tags found
1 merge request!10Dynamically load object files
Showing with 346 additions and 7 deletions
/target
*.dot
*.bc
*.out
*.ll
*.c
*.o
......@@ -191,6 +191,15 @@ dependencies = [
"ordered-float",
]
[[package]]
name = "hercules_matmul"
version = "0.1.0"
dependencies = [
"clap",
"hercules_rt",
"rand",
]
[[package]]
name = "hercules_opt"
version = "0.1.0"
......@@ -198,6 +207,13 @@ dependencies = [
"hercules_ir",
]
[[package]]
name = "hercules_rt"
version = "0.1.0"
dependencies = [
"libc",
]
[[package]]
name = "libc"
version = "0.2.153"
......
......@@ -4,6 +4,10 @@ members = [
"hercules_cg",
"hercules_ir",
"hercules_opt",
"hercules_rt",
"hercules_tools/hercules_dot",
"hercules_tools/hercules_cpu"
"hercules_tools/hercules_cpu",
"hercules_samples/matmul"
]
......@@ -165,7 +165,11 @@ pub fn cpu_beta_codegen<W: Write>(
}
}
// Step 4: do codegen for each function.
// Step 4: generate dummy uninitialized global - this is needed so that
// there'll be a non-empty .bss section in the ELF object file.
write!(w, "@dummy = dso_local global i32 0, align 4\n")?;
// Step 5: do codegen for each function.
for function_idx in 0..functions.len() {
let function = &functions[function_idx];
let typing = &typing[function_idx];
......@@ -176,7 +180,7 @@ pub fn cpu_beta_codegen<W: Write>(
let fork_join_map = &fork_join_maps[function_idx];
let fork_join_nest = &fork_join_nests[function_idx];
// Step 4.1: emit function signature.
// Step 5.1: emit function signature.
let llvm_ret_type = &llvm_types[function.return_type.idx()];
let mut llvm_params = function
.param_types
......@@ -198,7 +202,7 @@ pub fn cpu_beta_codegen<W: Write>(
}
write!(w, ") {{\n")?;
// Step 4.2: emit basic blocks. A node represents a basic block if its
// Step 5.2: emit basic blocks. A node represents a basic block if its
// entry in the basic blocks vector points to itself. Each basic block
// is created as four strings: the block header, the block's phis, the
// block's data computations, and the block's terminator instruction.
......@@ -217,7 +221,7 @@ pub fn cpu_beta_codegen<W: Write>(
}
}
// Step 4.3: emit nodes. Nodes are emitted into basic blocks separately
// Step 5.3: emit nodes. Nodes are emitted into basic blocks separately
// as nodes are not necessarily emitted in order. Assemble worklist of
// nodes, starting as reverse post order of nodes. For non-phi and non-
// reduce nodes, only emit once all data uses are emitted. In addition,
......@@ -261,7 +265,7 @@ pub fn cpu_beta_codegen<W: Write>(
}
}
// Step 4.4: put basic blocks in order.
// Step 5.4: put basic blocks in order.
for node in reverse_postorder {
if bb[node.idx()] == *node {
write!(
......@@ -275,7 +279,7 @@ pub fn cpu_beta_codegen<W: Write>(
}
}
// Step 4.5: close function.
// Step 5.5: close function.
write!(w, "}}\n")?;
}
......
[package]
name = "hercules_rt"
version = "0.1.0"
authors = ["Russel Arbore <rarbore2@illinois.edu>"]
[dependencies]
libc = "*"
extern crate libc;
use std::ffi::CStr;
use std::mem::size_of;
use std::ptr::copy_nonoverlapping;
use std::ptr::null_mut;
use std::ptr::read_unaligned;
use self::libc::*;
/*
* The libc crate doesn't have everything from elf.h, so these things need to be
* manually defined.
*/
#[repr(C)]
#[derive(Debug)]
struct Elf64_Rela {
r_offset: Elf64_Addr,
r_info: Elf64_Xword,
r_addend: Elf64_Sxword,
}
const R_X86_64_PC32: u64 = 2;
const R_X86_64_PLT32: u64 = 4;
const STT_FUNC: u8 = 2;
/*
* Holds a mmaped copy of .text + .bss for direct execution, plus metadata for
* each function. The .bss section holds a table storing addresses to internal
* runtime functions, since this is literally easier than patching the object
* code to directly jump to those runtime functions.
*/
#[derive(Debug)]
pub(crate) struct Elf {
pub(crate) function_names: Vec<String>,
pub(crate) function_pointers: Vec<isize>,
pub(crate) program_section: *mut u8,
pub(crate) program_size: usize,
}
impl Drop for Elf {
fn drop(&mut self) {
unsafe { munmap(self.program_section as *mut _, self.program_size) };
}
}
/*
* Function for parsing our internal memory representation of an ELF file from
* the raw bytes of an ELF file. This includes creating a executable section of
* code, and relocating function calls and global variables. This whole thing is
* very unsafe, and is predicated on the elf parameter referencing properly
* formatted bytes.
*/
pub(crate) unsafe fn parse_elf(elf: &[u8]) -> Elf {
fn page_align(n: usize) -> usize {
(n + (4096 - 1)) & !(4096 - 1)
}
// read_unaligned corresponds to memcpys in C - we need to memcpy structs
// out of the file's bytes, since they may be stored without proper
// alignment.
let header: Elf64_Ehdr = read_unaligned(elf.as_ptr() as *const _);
assert!(header.e_shentsize as usize == size_of::<Elf64_Shdr>());
let section_header_table: Box<[_]> = (0..header.e_shnum)
.map(|idx| {
read_unaligned(
(elf.as_ptr().offset(header.e_shoff as isize) as *const Elf64_Shdr)
.offset(idx as isize),
)
})
.collect();
// Look for the .symtab, .strtab, .text, .bss, and .rela.text sections. Only
// the .rela.text section is not necessary.
let mut symtab_ndx = -1;
let mut strtab_ndx = -1;
let mut text_ndx = -1;
let mut bss_ndx = -1;
let mut rela_text_ndx = -1;
let shstrtab = &elf[section_header_table[header.e_shstrndx as usize].sh_offset as usize..];
for i in 0..header.e_shnum as usize {
let section_name = &shstrtab[section_header_table[i].sh_name as usize..];
if section_name.starts_with(b".symtab") {
symtab_ndx = i as i32;
} else if section_name.starts_with(b".strtab") {
strtab_ndx = i as i32;
} else if section_name.starts_with(b".text") {
text_ndx = i as i32;
} else if section_name.starts_with(b".bss") {
bss_ndx = i as i32;
} else if section_name.starts_with(b".rela.text") {
rela_text_ndx = i as i32;
}
}
assert!(symtab_ndx != -1);
assert!(strtab_ndx != -1);
assert!(text_ndx != -1);
assert!(bss_ndx != -1);
// Get the headers for the required sections.
let symtab_hdr = section_header_table[symtab_ndx as usize];
let strtab_hdr = section_header_table[strtab_ndx as usize];
let text_hdr = section_header_table[text_ndx as usize];
let bss_hdr = section_header_table[bss_ndx as usize];
// Collect the symbols in the symbol table.
assert!(symtab_hdr.sh_entsize as usize == size_of::<Elf64_Sym>());
let num_symbols = symtab_hdr.sh_size as usize / size_of::<Elf64_Sym>();
let symbol_table: Box<[_]> = (0..num_symbols)
.map(|idx| {
read_unaligned(
(elf.as_ptr().offset(symtab_hdr.sh_offset as isize) as *const Elf64_Sym)
.offset(idx as isize),
)
})
.collect();
// The mmaped region includes both the .text and .bss sections.
let program_size = page_align(text_hdr.sh_size as usize) + page_align(bss_hdr.sh_size as usize);
let program_base = mmap(
null_mut(),
program_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0,
) as *mut u8;
let text_base = program_base;
let bss_base = text_base.offset(page_align(text_hdr.sh_size as usize) as isize);
// Copy the object code into the mmaped region.
copy_nonoverlapping(
elf.as_ptr().offset(text_hdr.sh_offset as isize),
text_base,
text_hdr.sh_size as usize,
);
// If there are relocations, we process them here.
if rela_text_ndx != -1 {
let rela_text_hdr = section_header_table[rela_text_ndx as usize];
let num_relocations = rela_text_hdr.sh_size / rela_text_hdr.sh_entsize;
// We only iterate the relocations in order, so no need to collect.
let relocations = (0..num_relocations).map(|idx| {
read_unaligned(
(elf.as_ptr().offset(rela_text_hdr.sh_offset as isize) as *const Elf64_Rela)
.offset(idx as isize),
)
});
for relocation in relocations {
let symbol_idx = relocation.r_info >> 32;
let ty = relocation.r_info & 0xFFFFFFFF;
let patch_offset = text_base.offset(relocation.r_offset as isize);
// We support PLT32 relocations only in the .text section, and PC32
// relocations only in the .bss section.
match ty {
R_X86_64_PLT32 => {
let symbol_address =
text_base.offset(symbol_table[symbol_idx as usize].st_value as isize);
let patch = symbol_address
.offset(relocation.r_addend as isize)
.offset_from(patch_offset);
(patch_offset as *mut u32).write_unaligned(patch as u32);
}
R_X86_64_PC32 => {
let symbol_address =
bss_base.offset(symbol_table[symbol_idx as usize].st_value as isize);
let patch = symbol_address
.offset(relocation.r_addend as isize)
.offset_from(patch_offset);
(patch_offset as *mut u32).write_unaligned(patch as u32);
}
_ => panic!("ERROR: Unrecognized relocation type: {}.", ty),
}
}
}
// Make the .text section readable and executable. The .bss section should
// still be readable and writable.
mprotect(
text_base as *mut c_void,
page_align(text_hdr.sh_size as usize),
PROT_READ | PROT_EXEC,
);
// Construct the final in-memory ELF representation. Look up the names of
// function symbols in the string table.
let strtab = &elf[strtab_hdr.sh_offset as usize..];
let mut elf = Elf {
function_names: vec![],
function_pointers: vec![],
program_section: program_base,
program_size,
};
for i in 0..num_symbols {
if symbol_table[i].st_info & 0xF == STT_FUNC {
let function_name_base = &strtab[symbol_table[i].st_name as usize..];
let function_name = CStr::from_ptr(function_name_base.as_ptr() as *const _)
.to_str()
.unwrap()
.to_owned();
elf.function_names.push(function_name);
elf.function_pointers
.push(symbol_table[i].st_value as isize);
}
}
elf
}
use std::fs::File;
use std::io::prelude::*;
use std::path::Path;
pub(crate) mod elf;
pub(crate) use crate::elf::*;
#[derive(Debug)]
pub struct Module {
elf: Elf,
}
impl Module {
pub fn get_function_ptr(&self, name: &str) -> *mut u8 {
unsafe {
self.elf.program_section.offset(
self.elf.function_pointers[self
.elf
.function_names
.iter()
.position(|s| s == name)
.unwrap()],
)
}
}
}
pub fn load_binary(path: &Path) -> Module {
let mut f = File::open(path).unwrap();
let mut buffer = vec![];
f.read_to_end(&mut buffer).unwrap();
let elf = unsafe { parse_elf(buffer.as_slice()) };
Module { elf }
}
/*
* An ugly, unchecked macro for looking up Hercules functions in a module. Curse
* Rust for not supporting variadic generics and type pattern matching :shrug:.
* TODO: Generate per-lookup struct type for checking that the provided types
* are correct.
*/
#[macro_export]
macro_rules! lookup_function {
($module:expr, $function:expr, $($param_ty:ty),*, => $ret_ty:ty) => {
{
let fn_ptr: fn($($param_ty),*) -> $ret_ty = unsafe { std::mem::transmute($module.get_function_ptr($function)) };
fn_ptr
}
};
}
File moved
File moved
File moved
File moved
[package]
name = "hercules_matmul"
version = "0.1.0"
authors = ["Russel Arbore <rarbore2@illinois.edu>"]
[dependencies]
clap = { version = "*", features = ["derive"] }
hercules_rt = { path = "../../hercules_rt" }
rand = "*"
File moved
extern crate clap;
use std::path::Path;
fn main() {
let module = hercules_rt::load_binary(Path::new("test.o"));
let matmul = hercules_rt::lookup_function!(
module,
"matmul",
*const f32,
*const f32,
*mut f32,
u64,
u64,
u64,
=> *const f32
);
let a = [[1.0f32, 2.0f32], [3.0f32, 4.0f32]];
let b = [[5.0f32, 6.0f32], [7.0f32, 8.0f32]];
let mut c = [[0.0f32, 0.0f32], [0.0f32, 0.0f32]];
unsafe {
matmul(
std::mem::transmute(a.as_ptr()),
std::mem::transmute(b.as_ptr()),
std::mem::transmute(c.as_mut_ptr()),
2,
2,
2,
)
};
println!("{} {}\n{} {}", c[0][0], c[0][1], c[1][0], c[1][1]);
}
File moved
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment