diff --git a/.gitignore b/.gitignore index 507684b6bfd3372427f56fbe2cea93961333b2a3..f0f409c246fa6e8fa3e4c862959e184eb3556108 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target *.dot +*.bc diff --git a/Cargo.lock b/Cargo.lock index 1e301dc0b414a284180c5ff73e6f0425537585f8..9a0ebed2f7ee50e8cebbe0ff4d82c99f68f00c6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.5.0" @@ -68,6 +77,15 @@ dependencies = [ "wyz", ] +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -120,6 +138,21 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "ena" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1" +dependencies = [ + "log", +] + [[package]] name = "funty" version = "2.0.0" @@ -147,7 +180,10 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" name = "hercules_codegen" version = "0.1.0" dependencies = [ + "bitvec", + "ena", "hercules_ir", + "inkwell", ] [[package]] @@ -177,12 +213,60 @@ dependencies = [ "rand", ] +[[package]] +name = "inkwell" +version = "0.2.0" +source = "git+https://github.com/TheDan64/inkwell?branch=master#7a09ad8a5f3b1fc416f95b5e1c97d33df0ab3f06" +dependencies = [ + "either", + "inkwell_internals", + "libc", + "llvm-sys", + "once_cell", + "thiserror", +] + +[[package]] +name = "inkwell_internals" +version = "0.8.0" +source = "git+https://github.com/TheDan64/inkwell?branch=master#7a09ad8a5f3b1fc416f95b5e1c97d33df0ab3f06" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +[[package]] +name = "llvm-sys" +version = "160.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf51981ac0622b10fe4790763e3de1f3d68a0ee4222e03accaaab6731bd508d" +dependencies = [ + "cc", + "lazy_static", + "libc", + "regex", + "semver", +] + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + [[package]] name = "memchr" version = "2.6.3" @@ -214,6 +298,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + [[package]] name = "ordered-float" version = "3.9.1" @@ -283,6 +373,41 @@ dependencies = [ "getrandom", ] +[[package]] +name = "regex" +version = "1.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + +[[package]] +name = "semver" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad977052201c6de01a8ef2aa3378c4bd23217a056337d1d6da40468d267a4fb0" + [[package]] name = "strsim" version = "0.10.0" @@ -306,6 +431,26 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "thiserror" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-ident" version = "1.0.11" diff --git a/hercules_codegen/Cargo.toml b/hercules_codegen/Cargo.toml index bd50b13f981284a2b4c13e4fc16f7d23a2b43e45..dfc4f7d19adf3e403e2bef5878b73fe02e1a0cb8 100644 --- a/hercules_codegen/Cargo.toml +++ b/hercules_codegen/Cargo.toml @@ -4,4 +4,7 @@ version = "0.1.0" authors = ["Russel Arbore <rarbore2@illinois.edu>"] [dependencies] +bitvec = "*" +ena = "*" +inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm16-0-prefer-dynamic"] } hercules_ir = { path = "../hercules_ir" } diff --git a/hercules_codegen/src/antideps.rs b/hercules_codegen/src/antideps.rs new file mode 100644 index 0000000000000000000000000000000000000000..ff748ce8b93d2294f6aa55dda5bc4281f17a8fa5 --- /dev/null +++ b/hercules_codegen/src/antideps.rs @@ -0,0 +1,52 @@ +extern crate hercules_ir; + +use self::hercules_ir::def_use::*; +use self::hercules_ir::ir::*; + +/* + * Top level function to assemble anti-dependence edges. Returns a list of pairs + * of nodes. The first item in the pair is the read node, and the second item is + * the write node. + */ +pub fn antideps(function: &Function, def_use: &ImmutableDefUseMap) -> Vec<(NodeID, NodeID)> { + // Array typed values are not directly computed on. Thus, there are actually + // very few nodes that have array inputs or output. As a result, when + // forming anti-dependencies for a single allocation, we only need to + // consider immediate users that are read or write nodes - no proper + // dataflow analysis necessary. + let mut antideps = vec![]; + + for id in (0..function.nodes.len()).map(NodeID::new) { + // We only need to consider array reads and writes. + let users = def_use.get_users(id); + let reads = users.iter().filter(|user| { + if let Node::ReadArray { array, index: _ } = function.nodes[user.idx()] { + array == id + } else { + false + } + }); + let mut writes = users.iter().filter(|user| { + if let Node::WriteArray { + array, + index: _, + data: _, + } = function.nodes[user.idx()] + { + array == id + } else { + false + } + }); + + // If there are any writes, compute the anti dependence edges. + if let Some(write) = writes.next() { + for read in reads { + antideps.push((*read, *write)); + } + } + assert!(writes.next() == None, "Can't form anti-dependencies when there are two independent writes depending on a single array value."); + } + + antideps +} diff --git a/hercules_codegen/src/array_alloc.rs b/hercules_codegen/src/array_alloc.rs new file mode 100644 index 0000000000000000000000000000000000000000..3ce9fcc13ed242fc0f2d4503340b68561745fe0d --- /dev/null +++ b/hercules_codegen/src/array_alloc.rs @@ -0,0 +1,214 @@ +extern crate ena; +extern crate hercules_ir; + +use std::collections::HashMap; + +use self::ena::unify::*; + +use self::hercules_ir::def_use::*; +use self::hercules_ir::ir::*; + +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] +struct UnitKey(u32); + +impl UnifyKey for UnitKey { + type Value = (); + + fn index(&self) -> u32 { + self.0 + } + + fn from_index(u: u32) -> UnitKey { + UnitKey(u) + } + + fn tag() -> &'static str { + "UnitKey" + } +} + +/* + * Top level function to allocate individual arrays for sets of nodes in the IR + * graph. Doesn't attempt to overlap allocations w/ liveness analysis. Returns + * a set of nodes per allocation, which are the nodes that use that allocation, + * along with dimensions specified with dynamic constants. The return value + * consists of a list of array allocations (described by their dynamic constant + * size), and a map from nodes involving array values to the number of the array + * allocation they operate on. + */ +pub fn logical_array_alloc( + function: &Function, + typing: &Vec<TypeID>, + types: &Vec<Type>, + fork_join_map: &HashMap<NodeID, NodeID>, + bbs: &Vec<NodeID>, + fork_join_nests: &HashMap<NodeID, Vec<NodeID>>, +) -> (Vec<Vec<DynamicConstantID>>, HashMap<NodeID, usize>) { + // Step 1: filter nodes that operate on arrays, either on their input or + // their output. + let id_outputs_array = |id: &NodeID| types[typing[id.idx()].idx()].is_array(); + let array_nodes: Vec<_> = (0..function.nodes.len()) + .map(NodeID::new) + .filter(|id| { + id_outputs_array(id) + || get_uses(&function.nodes[id.idx()]) + .as_ref() + .into_iter() + .any(id_outputs_array) + }) + .collect(); + let array_node_numbers: HashMap<_, _> = + std::iter::zip(array_nodes.iter().map(|x| *x), 0..array_nodes.len()).collect(); + + // Step 2: union find the nodes based on use edges. Every node in each set + // should use the same array allocation. The representative node for a set + // will be the node with the smallest ID. + let mut allocs: UnificationTable<InPlace<UnitKey>> = UnificationTable::new(); + let keys: Vec<_> = (0..array_nodes.len()).map(|_| allocs.new_key(())).collect(); + for node in array_nodes.iter() { + for array_use in get_uses(&function.nodes[node.idx()]) + .as_ref() + .into_iter() + .map(|x| *x) + .filter(id_outputs_array) + { + allocs.union( + keys[array_node_numbers[&node]], + keys[array_node_numbers[&array_use]], + ); + } + } + + // Step 3: determine the size of each array allocation. This is the size of + // the array type operated on, possibly in addition to dynamic constant + // factors corresponding to uses inside fork / joins. Each node that can + // operate on array values, and their corresponding affect on the array + // value's size, are listed below. + // + // Phi: only provides the base array dimensions + // Collect: provides the base array dimensions, in addition to dimensions + // corresponding to dominating fork / join nests + // Return: provides no array dimensions + // Parameter: only provides the base array dimensions + // Constant: only provides the base array dimensions + // Call: TODO + // ReadArray: only provides the base array dimensions + // WriteArray: provides the base array dimensions, in addition to dimensions + // corresponding to each fork / join the node is nested in + let mut key_to_value_size: HashMap<UnitKey, Vec<DynamicConstantID>> = HashMap::new(); + for key in keys.iter() { + let value_key = allocs.find(*key); + let id = array_nodes[key.index() as usize]; + + let extents = match function.nodes[id.idx()] { + Node::Phi { + control: _, + data: _, + } + | Node::Parameter { index: _ } + | Node::Constant { id: _ } + | Node::ReadArray { array: _, index: _ } => { + // For nodes that don't write to the array, the required size + // is just the underlying size of the array. + type_extents(typing[id.idx()], types) + } + Node::Collect { + control: _, + data: _, + } + | Node::WriteArray { + array: _, + data: _, + index: _, + } => { + // For nodes that write to the array, the required size depends + // on the surrounding fork / join pairs. + write_dimensionality( + function, + id, + typing, + types, + fork_join_map, + bbs, + fork_join_nests, + ) + } + Node::Return { + control: _, + data: _, + } => { + continue; + } + _ => todo!(), + }; + + // The largest required size is the correct size. It is assumed that all + // sizes calculated above form a total order with respect to suffix + // vectoring. + if let Some(old_extents) = key_to_value_size.get(&value_key) { + assert!( + std::iter::zip(old_extents.iter().rev(), extents.iter().rev()).all(|(a, b)| a == b) + ); + if old_extents.len() < extents.len() { + key_to_value_size.insert(value_key, extents); + } + } else { + key_to_value_size.insert(value_key, extents); + } + } + + // Step 4: collect array allocations as a numbered list. Map from array + // nodes to the array allocation number they use. + let mut logical_allocations: Vec<Vec<DynamicConstantID>> = vec![]; + let mut key_to_number: HashMap<UnitKey, usize> = HashMap::new(); + for (key, array_value) in key_to_value_size { + key_to_number.insert(key, logical_allocations.len()); + logical_allocations.push(array_value); + } + let mut node_to_logical_numbers: HashMap<NodeID, usize> = HashMap::new(); + for node in array_nodes.iter() { + node_to_logical_numbers.insert( + *node, + key_to_number[&allocs.find(keys[array_node_numbers[node]])], + ); + } + + (logical_allocations, node_to_logical_numbers) +} + +/* + * Get the dimensionality of a write. Checks for the dimensions of the array + * type, and adds dimensions corresponding to dominating fork / join nests. + */ +pub fn write_dimensionality( + function: &Function, + write: NodeID, + typing: &Vec<TypeID>, + types: &Vec<Type>, + fork_join_map: &HashMap<NodeID, NodeID>, + bbs: &Vec<NodeID>, + fork_join_nests: &HashMap<NodeID, Vec<NodeID>>, +) -> Vec<DynamicConstantID> { + let mut extents = type_extents(typing[write.idx()], types); + assert!( + extents.len() > 0, + "Can't call write_dimensionality with a node that doesn't output an array." + ); + extents.reverse(); + + for fork in fork_join_nests[&bbs[write.idx()]].iter() { + if let Node::Fork { control: _, factor } = function.nodes[fork.idx()] { + // If this node is a collect, we don't need to add the dimension + // from the corresponding fork. + if function.nodes[write.idx()].is_collect() && fork_join_map[&fork] != bbs[write.idx()] + { + extents.push(factor); + } + } else { + panic!("Fork join nests map contains a non-fork in the value list."); + } + } + + extents.reverse(); + extents +} diff --git a/hercules_codegen/src/cpu_alpha.rs b/hercules_codegen/src/cpu_alpha.rs new file mode 100644 index 0000000000000000000000000000000000000000..d08ed6d23f8fe082220048e1d7aa965608e53663 --- /dev/null +++ b/hercules_codegen/src/cpu_alpha.rs @@ -0,0 +1,1163 @@ +extern crate bitvec; +extern crate hercules_ir; +extern crate inkwell; + +use std::collections::HashMap; +use std::collections::VecDeque; +use std::convert::TryFrom; +use std::iter::repeat; +use std::iter::zip; + +use self::inkwell::basic_block::*; +use self::inkwell::builder::*; +use self::inkwell::context::*; +use self::inkwell::types::*; +use self::inkwell::values::*; +use self::inkwell::*; + +use self::hercules_ir::def_use::*; +use self::hercules_ir::ir::*; + +/* + * This CPU backend was written to get some Hercules IR running, and to better + * understand how writing backends for Hercules IR will work. This backend is + * not meant to be used in the long term. If you are reading this code in a + * significant amount of time from when this comment was written, you are + * probably already doing something wrong - Russel. + */ + +/* + * Top level function to generate code for a module. Writes the result object + * file to the specified path. + */ +pub fn cpu_alpha_codegen( + module: &hercules_ir::ir::Module, + typing: &hercules_ir::typecheck::ModuleTyping, + reverse_postorders: &Vec<Vec<NodeID>>, + def_uses: &Vec<ImmutableDefUseMap>, + bbs: &Vec<Vec<NodeID>>, + antideps: &Vec<Vec<(NodeID, NodeID)>>, + array_allocations: &Vec<(Vec<Vec<DynamicConstantID>>, HashMap<NodeID, usize>)>, + fork_join_nests: &Vec<HashMap<NodeID, Vec<NodeID>>>, + path: &std::path::Path, +) { + let hercules_ir::ir::Module { + functions, + types, + constants, + dynamic_constants, + } = module; + + // Step 1: initialize LLVM objects. + let llvm_context = Context::create(); + let llvm_module = llvm_context.create_module(""); + let llvm_builder = llvm_context.create_builder(); + + // Step 2: convert the types. This requires translating from our interning + // structures to LLVM's. We can't just blow through the types vector, since + // a type may reference a type ID ahead of it in the vector. Instead, + // iterate types in a bottom up order with respect to the type intern DAGs. + let mut llvm_types = vec![llvm_context.i8_type().as_basic_type_enum(); types.len()]; + for id in module.types_bottom_up() { + match &types[id.idx()] { + Type::Control(_) => {} + Type::Boolean => { + llvm_types[id.idx()] = llvm_context.bool_type().as_basic_type_enum(); + } + Type::Integer8 | Type::UnsignedInteger8 => { + llvm_types[id.idx()] = llvm_context.i8_type().as_basic_type_enum(); + } + Type::Integer16 | Type::UnsignedInteger16 => { + llvm_types[id.idx()] = llvm_context.i16_type().as_basic_type_enum(); + } + Type::Integer32 | Type::UnsignedInteger32 => { + llvm_types[id.idx()] = llvm_context.i32_type().as_basic_type_enum(); + } + Type::Integer64 | Type::UnsignedInteger64 => { + llvm_types[id.idx()] = llvm_context.i64_type().as_basic_type_enum(); + } + Type::Float32 => { + llvm_types[id.idx()] = llvm_context.f32_type().as_basic_type_enum(); + } + Type::Float64 => { + llvm_types[id.idx()] = llvm_context.f64_type().as_basic_type_enum(); + } + // Because we traverse in bottom-up order, we can assume that the + // LLVM types for children types are already computed. + Type::Product(fields) => { + let field_types = fields + .iter() + .map(|id| llvm_types[id.idx()]) + .collect::<Box<[_]>>(); + llvm_types[id.idx()] = llvm_context + .struct_type(&field_types, false) + .as_basic_type_enum(); + } + Type::Array(elem, _) => { + // Array types need to be flattened - an array of an array in + // Hercules IR needs to translate to a single pointer in LLVM. + if let Type::Array(_, _) = types[elem.idx()] { + llvm_types[id.idx()] = llvm_types[elem.idx()]; + } else { + let elem_type = llvm_types[elem.idx()]; + llvm_types[id.idx()] = elem_type + .ptr_type(AddressSpace::default()) + .as_basic_type_enum(); + } + } + Type::Summation(_) => todo!(), + } + } + + // Step 3: convert the constants. This is done in a very similar manner as + // types. + let mut llvm_constants = vec![ + llvm_context + .i8_type() + .const_int(0, false) + .as_basic_value_enum(); + constants.len() + ]; + for id in module.constants_bottom_up() { + match &constants[id.idx()] { + Constant::Boolean(val) => { + llvm_constants[id.idx()] = llvm_context + .bool_type() + .const_int(*val as u64, false) + .as_basic_value_enum(); + } + Constant::Integer8(val) => { + llvm_constants[id.idx()] = llvm_context + .i8_type() + .const_int(*val as u64, true) + .as_basic_value_enum(); + } + Constant::Integer16(val) => { + llvm_constants[id.idx()] = llvm_context + .i16_type() + .const_int(*val as u64, true) + .as_basic_value_enum(); + } + Constant::Integer32(val) => { + llvm_constants[id.idx()] = llvm_context + .i32_type() + .const_int(*val as u64, true) + .as_basic_value_enum(); + } + Constant::Integer64(val) => { + llvm_constants[id.idx()] = llvm_context + .i64_type() + .const_int(*val as u64, true) + .as_basic_value_enum(); + } + Constant::UnsignedInteger8(val) => { + llvm_constants[id.idx()] = llvm_context + .i8_type() + .const_int(*val as u64, false) + .as_basic_value_enum(); + } + Constant::UnsignedInteger16(val) => { + llvm_constants[id.idx()] = llvm_context + .i16_type() + .const_int(*val as u64, false) + .as_basic_value_enum(); + } + Constant::UnsignedInteger32(val) => { + llvm_constants[id.idx()] = llvm_context + .i32_type() + .const_int(*val as u64, false) + .as_basic_value_enum(); + } + Constant::UnsignedInteger64(val) => { + llvm_constants[id.idx()] = llvm_context + .i64_type() + .const_int(*val, false) + .as_basic_value_enum(); + } + Constant::Float32(val) => { + llvm_constants[id.idx()] = llvm_context + .f32_type() + .const_float(val.into_inner() as f64) + .as_basic_value_enum(); + } + Constant::Float64(val) => { + llvm_constants[id.idx()] = llvm_context + .f64_type() + .const_float(val.into_inner()) + .as_basic_value_enum(); + } + // Because we traverse in bottom-up order, we can assume that the + // LLVM constants for children constants are already computed. + Constant::Product(_, fields) => { + let field_constants = fields + .iter() + .map(|id| llvm_constants[id.idx()]) + .collect::<Box<[_]>>(); + llvm_constants[id.idx()] = llvm_context + .const_struct(&field_constants, false) + .as_basic_value_enum(); + } + Constant::Array(_, _) => todo!(), + Constant::Summation(_, _, _) => todo!(), + } + } + + // Step 4: do codegen for each function. + for function_idx in 0..functions.len() { + let function = &functions[function_idx]; + let typing = &typing[function_idx]; + let reverse_postorder = &reverse_postorders[function_idx]; + let def_use = &def_uses[function_idx]; + let bb = &bbs[function_idx]; + let antideps = &antideps[function_idx]; + let fork_join_nest = &fork_join_nests[function_idx]; + let array_allocations = &array_allocations[function_idx]; + + // Step 4.1: create LLVM function object. + let llvm_ret_type = llvm_types[function.return_type.idx()]; + let llvm_param_types = function + .param_types + .iter() + .map(|id| llvm_types[id.idx()].into()) + .chain( + repeat(BasicMetadataTypeEnum::try_from(llvm_context.i64_type()).unwrap()) + .take(function.num_dynamic_constants as usize), + ) + .chain( + repeat( + BasicMetadataTypeEnum::try_from( + llvm_context.i8_type().ptr_type(AddressSpace::default()), + ) + .unwrap(), + ) + .take(array_allocations.0.len() as usize), + ) + .collect::<Box<[_]>>(); + let llvm_fn_type = llvm_ret_type.fn_type(&llvm_param_types, false); + let llvm_fn = llvm_module.add_function(&function.name, llvm_fn_type, None); + + // Step 4.2: create LLVM basic blocks. A node needs a corresponding + // basic block if its entry in the basic blocks vector points to iself. + let mut llvm_bbs = HashMap::new(); + for id in (0..function.nodes.len()).map(NodeID::new) { + if bb[id.idx()] == id { + llvm_bbs.insert( + id, + llvm_context.append_basic_block(llvm_fn, &format!("bb_{}", id.idx())), + ); + } + } + + // Step 4.3: emit LLVM for each node. Assemble worklist of nodes, + // starting as reverse post order of nodes. For non-phi nodes, only emit + // once all data uses are emitted. In addition, consider additional anti + // dependence edges from read to write nodes. + let mut values = HashMap::new(); + let mut phi_values = HashMap::new(); + let mut branch_instructions = HashMap::new(); + let mut worklist = VecDeque::from(reverse_postorder.clone()); + while let Some(id) = worklist.pop_front() { + if !function.nodes[id.idx()].is_phi() + && !get_uses(&function.nodes[id.idx()]) + .as_ref() + .into_iter() + .chain( + antideps.iter().filter_map( + |(read, write)| if id == *write { Some(read) } else { None }, + ), + ) + .all(|x| function.is_control(*x) || values.contains_key(x)) + { + // Skip emitting node if it's not a phi node and if its data + // uses are not emitted yet. + worklist.push_back(id); + } else { + // Once all of the data dependencies for this node are emitted, + // this node can be emitted. + emit_llvm_for_node( + id, + &mut values, + &mut phi_values, + &mut branch_instructions, + function, + typing, + types, + dynamic_constants, + bb, + def_use, + fork_join_nest, + array_allocations, + &llvm_context, + &llvm_builder, + llvm_fn, + &llvm_bbs, + &llvm_types, + &llvm_constants, + ); + } + } + + // Step 4.4: patch phi nodes with incoming data values. + for id in (0..function.nodes.len()).map(NodeID::new) { + let node = &function.nodes[id.idx()]; + if node.is_phi() { + // Region node is the only strictly control use of the phi. + let uses = get_uses(node); + let region = uses + .as_ref() + .iter() + .filter(|id| function.nodes[id.idx()].is_strictly_control()) + .next() + .unwrap(); + + // Need to create intermediate vector - Inkwell expects a list + // of dynamic references to basic values. Those references must + // reference concrete basic values, which we need to create. + // Thus, we need to store them in this intermediate vector. + let data_uses: Vec<_> = uses + .as_ref() + .iter() + .filter(|id| !function.nodes[id.idx()].is_strictly_control()) + .map(|id| BasicValueEnum::try_from(values[id]).unwrap()) + .collect(); + let data_uses = data_uses + .iter() + .map(|ref_value| ref_value as &dyn BasicValue); + + // The basic blocks are the uses of the region node. + let region_uses = get_uses(&function.nodes[region.idx()]); + let pred_bbs = region_uses.as_ref().iter().map(|x| llvm_bbs[&bb[x.idx()]]); + + // The order of the data uses of the phi corresponds with the + // order of the control uses of the region. + let incoming_values: Vec<_> = zip(data_uses, pred_bbs).collect(); + phi_values[&id].add_incoming(&incoming_values[..]); + } + } + } + + // Step 5: write out module to given file path. + llvm_module.write_bitcode_to_path(path); +} + +/* + * Emit LLVM implementing a single node. + */ +fn emit_llvm_for_node<'ctx>( + id: NodeID, + values: &mut HashMap<NodeID, AnyValueEnum<'ctx>>, + phi_values: &mut HashMap<NodeID, PhiValue<'ctx>>, + branch_instructions: &mut HashMap<BasicBlock<'ctx>, InstructionValue<'ctx>>, + function: &Function, + typing: &Vec<TypeID>, + types: &Vec<Type>, + dynamic_constants: &Vec<DynamicConstant>, + bb: &Vec<NodeID>, + def_use: &ImmutableDefUseMap, + fork_join_nest: &HashMap<NodeID, Vec<NodeID>>, + array_allocations: &(Vec<Vec<DynamicConstantID>>, HashMap<NodeID, usize>), + llvm_context: &'ctx Context, + llvm_builder: &'ctx Builder, + llvm_fn: FunctionValue<'ctx>, + llvm_bbs: &HashMap<NodeID, BasicBlock<'ctx>>, + llvm_types: &Vec<BasicTypeEnum<'ctx>>, + llvm_constants: &Vec<BasicValueEnum<'ctx>>, +) { + // Helper to emit code for dynamic constants. + let emit_dynamic_constant = + |dyn_cons_id: DynamicConstantID| match dynamic_constants[dyn_cons_id.idx()] { + DynamicConstant::Constant(val) => llvm_context + .i64_type() + .const_int(val as u64, false) + .as_any_value_enum(), + DynamicConstant::Parameter(num) => llvm_fn + .get_nth_param((num + function.param_types.len()) as u32) + .unwrap() + .as_any_value_enum(), + }; + + // Helper to get array allocation. + let get_array_alloc = |idx: usize| { + llvm_fn + .get_nth_param( + (idx + function.param_types.len() + function.num_dynamic_constants as usize) as u32, + ) + .unwrap() + .as_any_value_enum() + }; + + // Helper to position at the beginning of a basic block. + let position_at_beginning = |bb: BasicBlock<'ctx>| { + if let Some(first_inst) = bb.get_first_instruction() { + llvm_builder.position_before(&first_inst); + } else { + llvm_builder.position_at_end(bb); + } + }; + + let llvm_bb = llvm_bbs[&bb[id.idx()]]; + if let Some(iv) = branch_instructions.get(&llvm_bb) { + llvm_builder.position_before(iv); + } else { + llvm_builder.position_at_end(llvm_bb); + } + match function.nodes[id.idx()] { + Node::Start | Node::Region { preds: _ } => { + let successor = def_use + .get_users(id) + .iter() + .filter(|id| function.nodes[id.idx()].is_strictly_control()) + .next() + .unwrap(); + branch_instructions.insert( + llvm_bb, + llvm_builder + .build_unconditional_branch(llvm_bbs[successor]) + .unwrap(), + ); + } + Node::If { control: _, cond } => { + let successors = def_use.get_users(id); + if function.nodes[successors[0].idx()] == (Node::ReadProd { prod: id, index: 0 }) { + branch_instructions.insert( + llvm_bb, + llvm_builder + .build_conditional_branch( + values[&cond].into_int_value(), + llvm_bbs[&bb[successors[1].idx()]], + llvm_bbs[&bb[successors[0].idx()]], + ) + .unwrap(), + ); + } else { + branch_instructions.insert( + llvm_bb, + llvm_builder + .build_conditional_branch( + values[&cond].into_int_value(), + llvm_bbs[&bb[successors[0].idx()]], + llvm_bbs[&bb[successors[1].idx()]], + ) + .unwrap(), + ); + } + } + Node::Fork { + control: _, + factor: _, + } => { + // Need to create phi node for the loop index. + let phi_value = llvm_builder.build_phi(llvm_context.i64_type(), "").unwrap(); + phi_values.insert(id, phi_value); + + let successor = def_use + .get_users(id) + .iter() + .filter(|id| function.nodes[id.idx()].is_strictly_control()) + .next() + .unwrap(); + branch_instructions.insert( + llvm_bb, + llvm_builder + .build_unconditional_branch(llvm_bbs[successor]) + .unwrap(), + ); + } + Node::Join { control } => { + // Form the bottom of the loop. We need to branch between the + // successor and the fork. + let fork_id = if let Type::Control(factors) = &types[typing[control.idx()].idx()] { + *factors.last().unwrap() + } else { + panic!() + }; + let phi_value = phi_values[&fork_id]; + let (fork_predecessor, factor) = + if let Node::Fork { control, factor } = &function.nodes[fork_id.idx()] { + (*control, *factor) + } else { + panic!() + }; + + // Create a serial loop with a simple index. + let bound = emit_dynamic_constant(factor); + let new_index = llvm_builder + .build_int_add( + phi_value.as_any_value_enum().into_int_value(), + llvm_context.i64_type().const_int(1, false), + "", + ) + .unwrap(); + phi_value.add_incoming(&[ + ( + &llvm_context.i64_type().const_int(0, false), + llvm_bbs[&bb[fork_predecessor.idx()]], + ), + (&new_index, llvm_bbs[&bb[id.idx()]]), + ]); + + // Create branch forming the serial loop. + let condition = llvm_builder + .build_int_compare(IntPredicate::ULT, new_index, bound.into_int_value(), "") + .unwrap(); + let successor = def_use + .get_users(id) + .iter() + .filter(|id| function.nodes[id.idx()].is_strictly_control()) + .next() + .unwrap(); + branch_instructions.insert( + llvm_bb, + llvm_builder + .build_conditional_branch( + condition, + llvm_bbs[&bb[fork_id.idx()]], + llvm_bbs[&bb[successor.idx()]], + ) + .unwrap(), + ); + } + Node::Phi { + control: _, + data: _, + } => { + // For some reason, Inkwell doesn't convert phi values to/from the + // AnyValueEnum type properly, so store phi values in another map. + position_at_beginning(llvm_bb); + let phi_value = llvm_builder + .build_phi(llvm_types[typing[id.idx()].idx()], "") + .unwrap(); + phi_values.insert(id, phi_value); + values.insert(id, phi_value.as_any_value_enum()); + } + Node::ThreadID { control } => { + let phi_value = phi_values[&control]; + values.insert(id, phi_value.as_any_value_enum()); + } + Node::Collect { control, data } => { + // Write into destination array only in inner-most collect. Outer + // collects become no-ops. + let elem_type = typing[data.idx()]; + if !types[elem_type.idx()].is_array() { + // Get all the thread IDs of the nested forks. These are the phi + // values corresponding to each fork. + let thread_ids = fork_join_nest[&control] + .iter() + .map(|fork| phi_values[&fork]); + let alloc_num = array_allocations.1[&id]; + let extents = &array_allocations.0[alloc_num]; + let mut write_index = llvm_context.i64_type().const_int(0, false); + let mut multiplier = llvm_context.i64_type().const_int(1, false); + for (thread_id, extent) in zip(thread_ids, extents.iter().rev()) { + // Add contribution of this index dimension to flat write + // index. + write_index = llvm_builder + .build_int_add( + write_index, + llvm_builder + .build_int_mul( + multiplier, + thread_id.as_any_value_enum().into_int_value(), + "", + ) + .unwrap(), + "", + ) + .unwrap(); + + // Keep running multiplication of extents seen so far. + multiplier = llvm_builder + .build_int_mul( + multiplier, + emit_dynamic_constant(*extent).into_int_value(), + "", + ) + .unwrap(); + } + + // Emit the write. + let array = get_array_alloc(alloc_num); + let ptr_type = llvm_types[typing[data.idx()].idx()]; + let gep_ptr = unsafe { + llvm_builder + .build_gep(ptr_type, array.into_pointer_value(), &[write_index], "") + .unwrap() + }; + llvm_builder + .build_store(gep_ptr, BasicValueEnum::try_from(values[&data]).unwrap()) + .unwrap(); + + values.insert(id, array); + } else { + values.insert(id, values[&data]); + } + } + Node::Return { control: _, data } => { + llvm_builder + .build_return(Some(&BasicValueEnum::try_from(values[&data]).unwrap())) + .unwrap(); + } + Node::Parameter { index } => { + values.insert( + id, + llvm_fn + .get_nth_param(index as u32) + .unwrap() + .as_any_value_enum(), + ); + } + Node::Constant { id: cons_id } => { + values.insert(id, llvm_constants[cons_id.idx()].into()); + } + Node::DynamicConstant { id: dyn_cons_id } => { + values.insert(id, emit_dynamic_constant(dyn_cons_id)); + } + Node::Unary { input, op } => { + let input = values[&input]; + match op { + UnaryOperator::Not => { + values.insert( + id, + llvm_builder + .build_not(input.into_int_value(), "") + .unwrap() + .as_any_value_enum(), + ); + } + UnaryOperator::Neg => { + if input.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_neg(input.into_float_value(), "") + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_neg(input.into_int_value(), "") + .unwrap() + .as_any_value_enum(), + ); + } + } + } + } + Node::Binary { left, right, op } => { + let left = values[&left]; + let right = values[&right]; + match op { + BinaryOperator::Add => { + if left.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_add( + left.into_float_value(), + right.into_float_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_add(left.into_int_value(), right.into_int_value(), "") + .unwrap() + .as_any_value_enum(), + ); + } + } + BinaryOperator::Sub => { + if left.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_sub( + left.into_float_value(), + right.into_float_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_sub(left.into_int_value(), right.into_int_value(), "") + .unwrap() + .as_any_value_enum(), + ); + } + } + BinaryOperator::Mul => { + if left.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_mul( + left.into_float_value(), + right.into_float_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_mul(left.into_int_value(), right.into_int_value(), "") + .unwrap() + .as_any_value_enum(), + ); + } + } + BinaryOperator::Div => { + if left.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_div( + left.into_float_value(), + right.into_float_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else if types[typing[id.idx()].idx()].is_unsigned() { + values.insert( + id, + llvm_builder + .build_int_unsigned_div( + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_signed_div( + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } + } + BinaryOperator::Rem => { + if left.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_rem( + left.into_float_value(), + right.into_float_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else if types[typing[id.idx()].idx()].is_unsigned() { + values.insert( + id, + llvm_builder + .build_int_unsigned_rem( + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_signed_rem( + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } + } + BinaryOperator::LT => { + if left.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_compare( + FloatPredicate::OLT, + left.into_float_value(), + right.into_float_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else if types[typing[id.idx()].idx()].is_unsigned() { + values.insert( + id, + llvm_builder + .build_int_compare( + IntPredicate::ULT, + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_compare( + IntPredicate::SLT, + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } + } + BinaryOperator::LTE => { + if left.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_compare( + FloatPredicate::OLE, + left.into_float_value(), + right.into_float_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else if types[typing[id.idx()].idx()].is_unsigned() { + values.insert( + id, + llvm_builder + .build_int_compare( + IntPredicate::ULE, + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_compare( + IntPredicate::SLE, + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } + } + BinaryOperator::GT => { + if left.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_compare( + FloatPredicate::OGT, + left.into_float_value(), + right.into_float_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else if types[typing[id.idx()].idx()].is_unsigned() { + values.insert( + id, + llvm_builder + .build_int_compare( + IntPredicate::UGT, + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_compare( + IntPredicate::SGT, + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } + } + BinaryOperator::GTE => { + if left.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_compare( + FloatPredicate::OGE, + left.into_float_value(), + right.into_float_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else if types[typing[id.idx()].idx()].is_unsigned() { + values.insert( + id, + llvm_builder + .build_int_compare( + IntPredicate::UGE, + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_compare( + IntPredicate::SGE, + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } + } + BinaryOperator::EQ => { + if left.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_compare( + FloatPredicate::OEQ, + left.into_float_value(), + right.into_float_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_compare( + IntPredicate::EQ, + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } + } + BinaryOperator::NE => { + if left.get_type().is_float_type() { + values.insert( + id, + llvm_builder + .build_float_compare( + FloatPredicate::ONE, + left.into_float_value(), + right.into_float_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } else { + values.insert( + id, + llvm_builder + .build_int_compare( + IntPredicate::NE, + left.into_int_value(), + right.into_int_value(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } + } + BinaryOperator::Or => { + values.insert( + id, + llvm_builder + .build_or(left.into_int_value(), right.into_int_value(), "") + .unwrap() + .as_any_value_enum(), + ); + } + BinaryOperator::And => { + values.insert( + id, + llvm_builder + .build_and(left.into_int_value(), right.into_int_value(), "") + .unwrap() + .as_any_value_enum(), + ); + } + BinaryOperator::Xor => { + values.insert( + id, + llvm_builder + .build_xor(left.into_int_value(), right.into_int_value(), "") + .unwrap() + .as_any_value_enum(), + ); + } + BinaryOperator::LSh => { + values.insert( + id, + llvm_builder + .build_left_shift(left.into_int_value(), right.into_int_value(), "") + .unwrap() + .as_any_value_enum(), + ); + } + BinaryOperator::RSh => { + values.insert( + id, + llvm_builder + .build_right_shift( + left.into_int_value(), + right.into_int_value(), + !types[typing[id.idx()].idx()].is_unsigned(), + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } + } + } + Node::ReadProd { prod, index } => { + // ReadProd nodes are special in that they may be projection nodes. + if function.nodes[prod.idx()].is_strictly_control() { + let successor = def_use.get_users(id)[0]; + branch_instructions.insert( + llvm_bb, + llvm_builder + .build_unconditional_branch(llvm_bbs[&successor]) + .unwrap(), + ); + } else { + values.insert( + id, + llvm_builder + .build_extract_value(values[&prod].into_struct_value(), index as u32, "") + .unwrap() + .as_any_value_enum(), + ); + } + } + Node::WriteProd { prod, index, data } => { + values.insert( + id, + llvm_builder + .build_insert_value( + values[&prod].into_struct_value(), + BasicValueEnum::try_from(values[&data]).unwrap(), + index as u32, + "", + ) + .unwrap() + .as_any_value_enum(), + ); + } + Node::ReadArray { array, index } => { + let elem_type = element_type(typing[id.idx()], types); + let llvm_elem_type = llvm_types[elem_type.idx()]; + + // If this is the last level of the array type, then do a load. + // Otherwise, the output is a pointer to the sub-array. + if types[typing[id.idx()].idx()].is_array() { + let mut index = values[&index].into_int_value(); + for dc in type_extents(typing[id.idx()], types) { + let dc = emit_dynamic_constant(dc); + index = llvm_builder + .build_int_mul(index, dc.into_int_value(), "") + .unwrap(); + } + values.insert(id, unsafe { + llvm_builder + .build_gep( + llvm_elem_type, + values[&array].into_pointer_value(), + &[index], + "", + ) + .unwrap() + .as_any_value_enum() + }); + } else { + let gep_ptr = unsafe { + llvm_builder + .build_gep( + llvm_elem_type, + values[&array].into_pointer_value(), + &[values[&index].into_int_value()], + "", + ) + .unwrap() + }; + values.insert( + id, + llvm_builder + .build_load(llvm_elem_type, gep_ptr, "") + .unwrap() + .as_any_value_enum(), + ); + } + } + Node::WriteArray { + array: _, + index: _, + data: _, + } => todo!(), + _ => todo!(), + } +} diff --git a/hercules_codegen/src/gcm.rs b/hercules_codegen/src/gcm.rs index 132ce305e2ea1fe80a7e90e8197d4c22ac9b2500..a27f15b08bff875c51dfaafa6e6640ca20dfb820 100644 --- a/hercules_codegen/src/gcm.rs +++ b/hercules_codegen/src/gcm.rs @@ -21,18 +21,36 @@ pub fn gcm( control_subgraph: &Subgraph, dom: &DomTree, fork_join_map: &HashMap<NodeID, NodeID>, + antideps: &Vec<(NodeID, NodeID)>, ) -> Vec<NodeID> { // Step 1: find the immediate control uses and immediate control users of // each node. - let immediate_control_uses = + let mut immediate_control_uses = forward_dataflow(function, reverse_postorder, |inputs, node_id| { immediate_control_flow(inputs, node_id, function) }); - let immediate_control_users = + let mut immediate_control_users = backward_dataflow(function, def_use, reverse_postorder, |inputs, node_id| { immediate_control_flow(inputs, node_id, function) }); + // Reads and writes forming anti dependencies must be put in the same block. + for (read, write) in antideps { + let meet = UnionNodeSet::meet( + &immediate_control_uses[read.idx()], + &immediate_control_uses[write.idx()], + ); + immediate_control_uses[read.idx()] = meet.clone(); + immediate_control_uses[write.idx()] = meet; + + let meet = UnionNodeSet::meet( + &immediate_control_users[read.idx()], + &immediate_control_users[write.idx()], + ); + immediate_control_users[read.idx()] = meet.clone(); + immediate_control_users[write.idx()] = meet; + } + // Step 2: calculate loop tree of function. let loops = loops(&control_subgraph, NodeID::new(0), &dom, fork_join_map); @@ -69,3 +87,31 @@ pub fn gcm( bbs } + +/* + * Find fork/join nests that each control node is inside of. Result is a map + * from each control node to a list of fork nodes. The fork nodes are listed in + * ascending order of nesting. + */ +pub fn compute_fork_join_nesting( + function: &Function, + dom: &DomTree, + fork_join_map: &HashMap<NodeID, NodeID>, +) -> HashMap<NodeID, Vec<NodeID>> { + // For each control node, ascend dominator tree, looking for fork nodes. For + // each fork node, make sure each control node isn't strictly dominated by + // the corresponding join node. + (0..function.nodes.len()) + .map(NodeID::new) + .filter(|id| function.is_control(*id)) + .map(|id| { + ( + id, + dom.ascend(id) + .filter(|id| function.nodes[id.idx()].is_fork()) + .filter(|fork_id| !dom.does_prop_dom(fork_join_map[&fork_id], id)) + .collect(), + ) + }) + .collect() +} diff --git a/hercules_codegen/src/lib.rs b/hercules_codegen/src/lib.rs index fd605651eed486718d591a1aa7d979f3e0c28ad5..7a1d6b6fda36262b4e4400c6b4f8f6730a017695 100644 --- a/hercules_codegen/src/lib.rs +++ b/hercules_codegen/src/lib.rs @@ -1,3 +1,9 @@ +pub mod antideps; +pub mod array_alloc; +pub mod cpu_alpha; pub mod gcm; +pub use crate::antideps::*; +pub use crate::array_alloc::*; +pub use crate::cpu_alpha::*; pub use crate::gcm::*; diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs index b732db550c994bd673dfaa42335c33824dc965ba..8e11efacbde3521f4dc307ad8a132bca03668333 100644 --- a/hercules_ir/src/dom.rs +++ b/hercules_ir/src/dom.rs @@ -113,6 +113,14 @@ impl DomTree { } } + pub fn ascend<'a>(&'a self, bottom: NodeID) -> DomChainIterator<'a> { + DomChainIterator { + dom: self, + iter: Some(bottom), + top: self.root, + } + } + pub fn get_underlying_map(&self) -> &HashMap<NodeID, (u32, NodeID)> { &self.idom } diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs index e237240f6ed4b3f7baf64cd8eef29ef7ba74664d..af1555d6d85153cd08d1c39de5ce93ead1c356c7 100644 --- a/hercules_ir/src/ir.rs +++ b/hercules_ir/src/ir.rs @@ -1,6 +1,12 @@ +extern crate bitvec; extern crate ordered_float; use std::fmt::Write; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; + +use self::bitvec::prelude::*; use crate::*; @@ -18,6 +24,222 @@ pub struct Module { pub dynamic_constants: Vec<DynamicConstant>, } +/* + * A function has a name, a list of types for its parameters, a single return + * type, a list of nodes in its sea-of-nodes style IR, and a number of dynamic + * constants. When calling a function, arguments matching the parameter types + * are required, as well as the correct number of dynamic constants. All + * dynamic constants are 64-bit unsigned integers (usize / u64), so it is + * sufficient to merely store how many of them the function takes as arguments. + */ +#[derive(Debug, Clone)] +pub struct Function { + pub name: String, + pub param_types: Vec<TypeID>, + pub return_type: TypeID, + pub nodes: Vec<Node>, + pub num_dynamic_constants: u32, +} + +/* + * Hercules IR has a fairly standard type system, with the exception of the + * control type. Hercules IR is based off of the sea-of-nodes IR, the main + * feature of which being a merged control and data flow graph. Thus, control + * is a type of value, just like any other type. However, the type system is + * very restrictive over what can be done with control values. A novel addition + * in Hercules IR is that a control type is parameterized by a list of thread + * spawning factors. This is the mechanism in Hercules IR for representing + * parallelism. Summation types are an IR equivalent of Rust's enum types. + * These are lowered into tagged unions during scheduling. Array types are one- + * dimensional. Multi-dimensional arrays are represented by nesting array types. + * An array extent is represented with a dynamic constant. + */ +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Type { + Control(Box<[NodeID]>), + Boolean, + Integer8, + Integer16, + Integer32, + Integer64, + UnsignedInteger8, + UnsignedInteger16, + UnsignedInteger32, + UnsignedInteger64, + Float32, + Float64, + Product(Box<[TypeID]>), + Summation(Box<[TypeID]>), + Array(TypeID, DynamicConstantID), +} + +/* + * Constants are pretty standard in Hercules IR. Float constants used the + * ordered_float crate so that constants can be keys in maps (used for + * interning constants during IR construction). Product, summation, and array + * constants all contain their own type. This is only strictly necessary for + * summation types, but provides a nice mechanism for sanity checking for + * product and array types as well. + */ +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Constant { + Boolean(bool), + Integer8(i8), + Integer16(i16), + Integer32(i32), + Integer64(i64), + UnsignedInteger8(u8), + UnsignedInteger16(u16), + UnsignedInteger32(u32), + UnsignedInteger64(u64), + Float32(ordered_float::OrderedFloat<f32>), + Float64(ordered_float::OrderedFloat<f64>), + Product(TypeID, Box<[ConstantID]>), + Summation(TypeID, u32, ConstantID), + Array(TypeID, Box<[ConstantID]>), +} + +/* + * Dynamic constants are unsigned 64-bit integers passed to a Hercules function + * at runtime using the Hercules conductor API. They cannot be the result of + * computations in Hercules IR. For a single execution of a Hercules function, + * dynamic constants are constant throughout execution. This provides a + * mechanism by which Hercules functions can operate on arrays with variable + * length, while not needing Hercules functions to perform dynamic memory + * allocation - by providing dynamic constants to the conductor API, the + * conductor can allocate memory as necessary. + */ +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum DynamicConstant { + Constant(usize), + Parameter(usize), +} + +/* + * Hercules IR is a combination of a possibly cylic control flow graph, and + * many possibly cyclic data flow graphs. Each node represents some operation on + * input values (including control), and produces some output value. Operations + * that conceptually produce multiple outputs (such as an if node) produce a + * product type instead. For example, the if node produces prod(control(N), + * control(N)), where the first control token represents the false branch, and + * the second control token represents the true branch. Functions are devoid of + * side effects, so call nodes don't take as input or output control tokens. + * There is also no global memory - use arrays. + */ +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Node { + Start, + Region { + preds: Box<[NodeID]>, + }, + If { + control: NodeID, + cond: NodeID, + }, + Fork { + control: NodeID, + factor: DynamicConstantID, + }, + Join { + control: NodeID, + }, + Phi { + control: NodeID, + data: Box<[NodeID]>, + }, + ThreadID { + control: NodeID, + }, + Collect { + control: NodeID, + data: NodeID, + }, + Return { + control: NodeID, + data: NodeID, + }, + Parameter { + index: usize, + }, + Constant { + id: ConstantID, + }, + DynamicConstant { + id: DynamicConstantID, + }, + Unary { + input: NodeID, + op: UnaryOperator, + }, + Binary { + left: NodeID, + right: NodeID, + op: BinaryOperator, + }, + Call { + function: FunctionID, + dynamic_constants: Box<[DynamicConstantID]>, + args: Box<[NodeID]>, + }, + ReadProd { + prod: NodeID, + index: usize, + }, + WriteProd { + prod: NodeID, + data: NodeID, + index: usize, + }, + ReadArray { + array: NodeID, + index: NodeID, + }, + WriteArray { + array: NodeID, + data: NodeID, + index: NodeID, + }, + Match { + control: NodeID, + sum: NodeID, + }, + BuildSum { + data: NodeID, + sum_ty: TypeID, + variant: usize, + }, + ExtractSum { + data: NodeID, + variant: usize, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum UnaryOperator { + Not, + Neg, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum BinaryOperator { + Add, + Sub, + Mul, + Div, + Rem, + LT, + LTE, + GT, + GTE, + EQ, + NE, + Or, + And, + Xor, + LSh, + RSh, +} + impl Module { /* * There are many transformations that need to iterate over the functions @@ -169,23 +391,146 @@ impl Module { Ok(()) } + + /* + * Create an iterator that traverses all the types in the module bottom up. + * This uses a coroutine to make iteratively traversing the type DAGs + * easier. + */ + pub fn types_bottom_up(&self) -> impl Iterator<Item = TypeID> + '_ { + let types = &self.types; + let mut visited = bitvec![u8, Lsb0; 0; self.types.len()]; + let mut stack = (0..self.types.len()) + .map(TypeID::new) + .collect::<Vec<TypeID>>(); + let coroutine = move || { + // Since this is a coroutine, handle recursion manually. + while let Some(id) = stack.pop() { + if visited[id.idx()] { + continue; + } + match &types[id.idx()] { + Type::Product(children) | Type::Summation(children) => { + // We have to yield the children of this node before + // this node itself. We keep track of which nodes have + // yielded using visited. + let can_yield = children.iter().all(|x| visited[x.idx()]); + if can_yield { + visited.set(id.idx(), true); + yield id; + } else { + // Push ourselves, then children, so that children + // get popped first. + stack.push(id); + for id in children.iter() { + stack.push(*id); + } + } + } + Type::Array(child, _) => { + // Same idea as product / summation, but there's only + // one child. + let can_yield = visited[child.idx()]; + if can_yield { + visited.set(id.idx(), true); + yield id; + } else { + stack.push(id); + stack.push(*child); + } + } + _ => { + visited.set(id.idx(), true); + yield id; + } + } + } + }; + CoroutineIterator { + coroutine: Box::new(coroutine), + } + } + + /* + * Create an iterator that traverses all the constants in the module bottom up. + * This uses a coroutine to make iteratively traversing the constant DAGs + * easier. + */ + pub fn constants_bottom_up(&self) -> impl Iterator<Item = ConstantID> + '_ { + let constants = &self.constants; + let mut visited = bitvec![u8, Lsb0; 0; self.constants.len()]; + let mut stack = (0..self.constants.len()) + .map(ConstantID::new) + .collect::<Vec<ConstantID>>(); + let coroutine = move || { + // Since this is a coroutine, handle recursion manually. + while let Some(id) = stack.pop() { + if visited[id.idx()] { + continue; + } + match &constants[id.idx()] { + Constant::Product(_, children) | Constant::Array(_, children) => { + // We have to yield the children of this node before + // this node itself. We keep track of which nodes have + // yielded using visited. + let can_yield = children.iter().all(|x| visited[x.idx()]); + if can_yield { + visited.set(id.idx(), true); + yield id; + } else { + // Push ourselves, then children, so that children + // get popped first. + stack.push(id); + for id in children.iter() { + stack.push(*id); + } + } + } + Constant::Summation(_, _, child) => { + // Same idea as product / summation, but there's only + // one child. + let can_yield = visited[child.idx()]; + if can_yield { + visited.set(id.idx(), true); + yield id; + } else { + stack.push(id); + stack.push(*child); + } + } + _ => { + visited.set(id.idx(), true); + yield id; + } + } + } + }; + CoroutineIterator { + coroutine: Box::new(coroutine), + } + } } -/* - * A function has a name, a list of types for its parameters, a single return - * type, a list of nodes in its sea-of-nodes style IR, and a number of dynamic - * constants. When calling a function, arguments matching the parameter types - * are required, as well as the correct number of dynamic constants. All - * dynamic constants are 64-bit unsigned integers (usize / u64), so it is - * sufficient to merely store how many of them the function takes as arguments. - */ -#[derive(Debug, Clone)] -pub struct Function { - pub name: String, - pub param_types: Vec<TypeID>, - pub return_type: TypeID, - pub nodes: Vec<Node>, - pub num_dynamic_constants: u32, +struct CoroutineIterator<G, I> +where + G: Coroutine<Yield = I, Return = ()> + Unpin, +{ + coroutine: G, +} + +impl<G, I> Iterator for CoroutineIterator<G, I> +where + G: Coroutine<Yield = I, Return = ()> + Unpin, +{ + type Item = I; + + fn next(&mut self) -> Option<Self::Item> { + // Iterator corresponds to yields from coroutine. + match Pin::new(&mut self.coroutine).resume(()) { + CoroutineState::Yielded(item) => Some(item), + CoroutineState::Complete(_) => None, + } + } } impl Function { @@ -241,38 +586,31 @@ impl Function { std::mem::swap(&mut new_nodes, &mut self.nodes); } -} -/* - * Hercules IR has a fairly standard type system, with the exception of the - * control type. Hercules IR is based off of the sea-of-nodes IR, the main - * feature of which being a merged control and data flow graph. Thus, control - * is a type of value, just like any other type. However, the type system is - * very restrictive over what can be done with control values. A novel addition - * in Hercules IR is that a control type is parameterized by a list of thread - * spawning factors. This is the mechanism in Hercules IR for representing - * parallelism. Summation types are an IR equivalent of Rust's enum types. - * These are lowered into tagged unions during scheduling. Array types are one- - * dimensional. Multi-dimensional arrays are represented by nesting array types. - * An array extent is represented with a dynamic constant. - */ -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum Type { - Control(Box<[NodeID]>), - Boolean, - Integer8, - Integer16, - Integer32, - Integer64, - UnsignedInteger8, - UnsignedInteger16, - UnsignedInteger32, - UnsignedInteger64, - Float32, - Float64, - Product(Box<[TypeID]>), - Summation(Box<[TypeID]>), - Array(TypeID, DynamicConstantID), + /* + * Checking if a node is control requires surrounding context, so this is a + * member of Function, not Node. + */ + pub fn is_control(&self, id: NodeID) -> bool { + if self.nodes[id.idx()].is_strictly_control() { + return true; + } + + if let Node::ReadProd { prod, index: _ } = self.nodes[id.idx()] { + return match self.nodes[prod.idx()] { + // ReadProd nodes are control nodes if their predecessor is a + // legal control node. + Node::Match { control: _, sum: _ } + | Node::If { + control: _, + cond: _, + } => true, + _ => false, + }; + } + + false + } } impl Type { @@ -327,32 +665,30 @@ impl Type { pub fn is_primitive(&self) -> bool { self.is_bool() || self.is_fixed() || self.is_float() } + + pub fn is_array(&self) -> bool { + if let Type::Array(_, _) = self { + true + } else { + false + } + } } -/* - * Constants are pretty standard in Hercules IR. Float constants used the - * ordered_float crate so that constants can be keys in maps (used for - * interning constants during IR construction). Product, summation, and array - * constants all contain their own type. This is only strictly necessary for - * summation types, but provides a nice mechanism for sanity checking for - * product and array types as well. - */ -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum Constant { - Boolean(bool), - Integer8(i8), - Integer16(i16), - Integer32(i32), - Integer64(i64), - UnsignedInteger8(u8), - UnsignedInteger16(u16), - UnsignedInteger32(u32), - UnsignedInteger64(u64), - Float32(ordered_float::OrderedFloat<f32>), - Float64(ordered_float::OrderedFloat<f64>), - Product(TypeID, Box<[ConstantID]>), - Summation(TypeID, u32, ConstantID), - Array(TypeID, Box<[ConstantID]>), +pub fn type_extents(mut ty: TypeID, types: &Vec<Type>) -> Vec<DynamicConstantID> { + let mut extents = vec![]; + while let Type::Array(elem, dc) = types[ty.idx()] { + extents.push(dc); + ty = elem; + } + extents +} + +pub fn element_type(mut ty: TypeID, types: &Vec<Type>) -> TypeID { + while let Type::Array(elem, _) = types[ty.idx()] { + ty = elem; + } + ty } impl Constant { @@ -392,152 +728,9 @@ impl Constant { } } -/* - * Dynamic constants are unsigned 64-bit integers passed to a Hercules function - * at runtime using the Hercules conductor API. They cannot be the result of - * computations in Hercules IR. For a single execution of a Hercules function, - * dynamic constants are constant throughout execution. This provides a - * mechanism by which Hercules functions can operate on arrays with variable - * length, while not needing Hercules functions to perform dynamic memory - * allocation - by providing dynamic constants to the conductor API, the - * conductor can allocate memory as necessary. - */ -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum DynamicConstant { - Constant(usize), - Parameter(usize), -} - -/* - * Hercules IR is a combination of a possibly cylic control flow graph, and - * many possibly cyclic data flow graphs. Each node represents some operation on - * input values (including control), and produces some output value. Operations - * that conceptually produce multiple outputs (such as an if node) produce a - * product type instead. For example, the if node produces prod(control(N), - * control(N)), where the first control token represents the false branch, and - * the second control token represents the true branch. Functions are devoid of - * side effects, so call nodes don't take as input or output control tokens. - * There is also no global memory - use arrays. - */ -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum Node { - Start, - Region { - preds: Box<[NodeID]>, - }, - If { - control: NodeID, - cond: NodeID, - }, - Fork { - control: NodeID, - factor: DynamicConstantID, - }, - Join { - control: NodeID, - }, - Phi { - control: NodeID, - data: Box<[NodeID]>, - }, - ThreadID { - control: NodeID, - }, - Collect { - control: NodeID, - data: NodeID, - }, - Return { - control: NodeID, - data: NodeID, - }, - Parameter { - index: usize, - }, - Constant { - id: ConstantID, - }, - DynamicConstant { - id: DynamicConstantID, - }, - Unary { - input: NodeID, - op: UnaryOperator, - }, - Binary { - left: NodeID, - right: NodeID, - op: BinaryOperator, - }, - Call { - function: FunctionID, - dynamic_constants: Box<[DynamicConstantID]>, - args: Box<[NodeID]>, - }, - ReadProd { - prod: NodeID, - index: usize, - }, - WriteProd { - prod: NodeID, - data: NodeID, - index: usize, - }, - ReadArray { - array: NodeID, - index: NodeID, - }, - WriteArray { - array: NodeID, - data: NodeID, - index: NodeID, - }, - Match { - control: NodeID, - sum: NodeID, - }, - BuildSum { - data: NodeID, - sum_ty: TypeID, - variant: usize, - }, - ExtractSum { - data: NodeID, - variant: usize, - }, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum UnaryOperator { - Not, - Neg, - Bitflip, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum BinaryOperator { - Add, - Sub, - Mul, - Div, - Rem, - LT, - LTE, - GT, - GTE, - EQ, - NE, - Or, - And, - Xor, - LSh, - RSh, -} - /* * Simple predicate functions on nodes take a lot of space, so use a macro. */ - macro_rules! define_pattern_predicate { ($x: ident, $y: pat) => { pub fn $x(&self) -> bool { @@ -590,6 +783,24 @@ impl Node { data: _, } ); + define_pattern_predicate!(is_read_prod, Node::ReadProd { prod: _, index: _ }); + define_pattern_predicate!( + is_write_prod, + Node::WriteProd { + prod: _, + index: _, + data: _ + } + ); + define_pattern_predicate!(is_read_array, Node::ReadArray { array: _, index: _ }); + define_pattern_predicate!( + is_write_array, + Node::WriteArray { + array: _, + index: _, + data: _ + } + ); define_pattern_predicate!(is_match, Node::Match { control: _, sum: _ }); /* @@ -743,7 +954,6 @@ impl UnaryOperator { match self { UnaryOperator::Not => "Not", UnaryOperator::Neg => "Neg", - UnaryOperator::Bitflip => "Bitflip", } } @@ -751,7 +961,6 @@ impl UnaryOperator { match self { UnaryOperator::Not => "not", UnaryOperator::Neg => "neg", - UnaryOperator::Bitflip => "bitflip", } } } @@ -803,67 +1012,26 @@ impl BinaryOperator { /* * Rust things to make newtyped IDs usable. */ -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct FunctionID(u32); - -impl FunctionID { - pub fn new(x: usize) -> Self { - FunctionID(x as u32) - } - - pub fn idx(&self) -> usize { - self.0 as usize - } -} -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct NodeID(u32); - -impl NodeID { - pub fn new(x: usize) -> Self { - NodeID(x as u32) - } - - pub fn idx(&self) -> usize { - self.0 as usize - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct ConstantID(u32); - -impl ConstantID { - pub fn new(x: usize) -> Self { - ConstantID(x as u32) - } +macro_rules! define_id_type { + ($x: ident) => { + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] + pub struct $x(u32); - pub fn idx(&self) -> usize { - self.0 as usize - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct TypeID(u32); - -impl TypeID { - pub fn new(x: usize) -> Self { - TypeID(x as u32) - } + impl $x { + pub fn new(x: usize) -> Self { + $x(x as u32) + } - pub fn idx(&self) -> usize { - self.0 as usize - } + pub fn idx(&self) -> usize { + self.0 as usize + } + } + }; } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct DynamicConstantID(u32); - -impl DynamicConstantID { - pub fn new(x: usize) -> Self { - DynamicConstantID(x as u32) - } - - pub fn idx(&self) -> usize { - self.0 as usize - } -} +define_id_type!(FunctionID); +define_id_type!(NodeID); +define_id_type!(TypeID); +define_id_type!(ConstantID); +define_id_type!(DynamicConstantID); diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs index 6d4364cd5518321e0d3e41d91bccad2e840166ab..9606c0b265c28f1769794619426b9accf66ca992 100644 --- a/hercules_ir/src/lib.rs +++ b/hercules_ir/src/lib.rs @@ -1,3 +1,5 @@ +#![feature(coroutines, coroutine_trait)] + pub mod build; pub mod dataflow; pub mod def_use; diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs index 2373332e102ba4fd254dff14641f1b16e333d864..0900d49ff5438725ed0eaf0af9366373521c58f8 100644 --- a/hercules_ir/src/parse.rs +++ b/hercules_ir/src/parse.rs @@ -291,7 +291,6 @@ fn parse_node<'a>( // parse them into Unary or Binary node kinds. "not" => parse_unary(ir_text, context, UnaryOperator::Not)?, "neg" => parse_unary(ir_text, context, UnaryOperator::Neg)?, - "bitflip" => parse_unary(ir_text, context, UnaryOperator::Bitflip)?, "add" => parse_binary(ir_text, context, BinaryOperator::Add)?, "sub" => parse_binary(ir_text, context, BinaryOperator::Sub)?, "mul" => parse_binary(ir_text, context, BinaryOperator::Mul)?, diff --git a/hercules_ir/src/subgraph.rs b/hercules_ir/src/subgraph.rs index 290abd7475c4af18e39f1f42f634c9d63f13f091..c4829b60c488a9b5c8032c90229eb3851c0eb298 100644 --- a/hercules_ir/src/subgraph.rs +++ b/hercules_ir/src/subgraph.rs @@ -170,7 +170,7 @@ impl Subgraph { */ pub fn subgraph<F>(function: &Function, def_use: &ImmutableDefUseMap, predicate: F) -> Subgraph where - F: Fn(&Node) -> bool, + F: Fn(NodeID) -> bool, { let mut subgraph = Subgraph { nodes: vec![], @@ -183,12 +183,12 @@ where }; // Step 1: collect predicated nodes. - for (idx, node) in function.nodes.iter().enumerate() { - if predicate(node) { + for id in (0..function.nodes.len()).map(NodeID::new) { + if predicate(id) { subgraph .node_numbers - .insert(NodeID::new(idx), subgraph.nodes.len() as u32); - subgraph.nodes.push(NodeID::new(idx)); + .insert(id, subgraph.nodes.len() as u32); + subgraph.nodes.push(id); } } @@ -235,35 +235,5 @@ where * Get the control subgraph of a function. */ pub fn control_subgraph(function: &Function, def_use: &ImmutableDefUseMap) -> Subgraph { - use Node::*; - - subgraph(function, def_use, |node| match node { - Start - | Region { preds: _ } - | If { - control: _, - cond: _, - } - | Fork { - control: _, - factor: _, - } - | Join { control: _ } - | Return { - control: _, - data: _, - } - | Match { control: _, sum: _ } => true, - ReadProd { prod, index: _ } => match function.nodes[prod.idx()] { - // ReadProd nodes are control nodes if their predecessor is a - // legal control node. - Match { control: _, sum: _ } - | If { - control: _, - cond: _, - } => true, - _ => false, - }, - _ => false, - }) + subgraph(function, def_use, |node| function.is_control(node)) } diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs index 09f4cbb73a705796dd69509bcdbd847507e366a5..ad53283d4e5e0b67b697b1e895426d940217df88 100644 --- a/hercules_ir/src/typecheck.rs +++ b/hercules_ir/src/typecheck.rs @@ -600,9 +600,9 @@ fn typeflow( if let Concrete(id) = inputs[0] { match op { UnaryOperator::Not => { - if !types[id.idx()].is_bool() { + if !types[id.idx()].is_bool() && !types[id.idx()].is_fixed() { return Error(String::from( - "Not unary node input cannot have non-boolean type.", + "Not unary node input cannot have non-bool and non-fixed type.", )); } } @@ -618,13 +618,6 @@ fn typeflow( )); } } - UnaryOperator::Bitflip => { - if !types[id.idx()].is_fixed() { - return Error(String::from( - "Bitflip unary node input cannot have non-fixed type.", - )); - } - } } } diff --git a/hercules_opt/src/ccp.rs b/hercules_opt/src/ccp.rs index fc5bc2166fce93c3213f3d1ef7265971fad68b51..2993bf8762a479609cc8ff161f592bec8d2e97c5 100644 --- a/hercules_opt/src/ccp.rs +++ b/hercules_opt/src/ccp.rs @@ -442,16 +442,16 @@ fn ccp_flow_function( let new_constant = if let ConstantLattice::Constant(cons) = constant { let new_cons = match (op, cons) { (UnaryOperator::Not, Constant::Boolean(val)) => Constant::Boolean(!val), + (UnaryOperator::Not, Constant::Integer8(val)) => Constant::Integer8(!val), + (UnaryOperator::Not, Constant::Integer16(val)) => Constant::Integer16(!val), + (UnaryOperator::Not, Constant::Integer32(val)) => Constant::Integer32(!val), + (UnaryOperator::Not, Constant::Integer64(val)) => Constant::Integer64(!val), (UnaryOperator::Neg, Constant::Integer8(val)) => Constant::Integer8(-val), (UnaryOperator::Neg, Constant::Integer16(val)) => Constant::Integer16(-val), (UnaryOperator::Neg, Constant::Integer32(val)) => Constant::Integer32(-val), (UnaryOperator::Neg, Constant::Integer64(val)) => Constant::Integer64(-val), (UnaryOperator::Neg, Constant::Float32(val)) => Constant::Float32(-val), (UnaryOperator::Neg, Constant::Float64(val)) => Constant::Float64(-val), - (UnaryOperator::Bitflip, Constant::Integer8(val)) => Constant::Integer8(!val), - (UnaryOperator::Bitflip, Constant::Integer16(val)) => Constant::Integer16(!val), - (UnaryOperator::Bitflip, Constant::Integer32(val)) => Constant::Integer32(!val), - (UnaryOperator::Bitflip, Constant::Integer64(val)) => Constant::Integer64(!val), _ => panic!("Unsupported combination of unary operation and constant value. Did typechecking succeed?") }; ConstantLattice::Constant(new_cons) diff --git a/hercules_tools/src/hercules_cpu/main.rs b/hercules_tools/src/hercules_cpu/main.rs index bb0a49963f2b0422aa97489addf1bcbefa241c16..9ea218afa729c60372a78baa4d02d15385c897a8 100644 --- a/hercules_tools/src/hercules_cpu/main.rs +++ b/hercules_tools/src/hercules_cpu/main.rs @@ -49,10 +49,17 @@ fn main() { (function, (types, constants, dynamic_constants)) }, ); - let (def_uses, reverse_postorders, _typing, subgraphs, doms, _postdoms, fork_join_maps) = + let (def_uses, reverse_postorders, typing, subgraphs, doms, _postdoms, fork_join_maps) = hercules_ir::verify::verify(&mut module) .expect("PANIC: Failed to verify Hercules IR module."); + let antideps: Vec<_> = module + .functions + .iter() + .enumerate() + .map(|(idx, function)| hercules_codegen::antideps::antideps(function, &def_uses[idx])) + .collect(); + let bbs: Vec<_> = module .functions .iter() @@ -65,13 +72,49 @@ fn main() { &subgraphs[idx], &doms[idx], &fork_join_maps[idx], + &antideps[idx], ) - .iter() - .map(|id| id.idx()) - .enumerate() - .collect::<Vec<_>>() }) .collect(); - println!("{:?}", bbs); + let fork_join_nests: Vec<_> = module + .functions + .iter() + .enumerate() + .map(|(idx, function)| { + hercules_codegen::gcm::compute_fork_join_nesting( + function, + &doms[idx], + &fork_join_maps[idx], + ) + }) + .collect(); + + let array_allocs: Vec<_> = module + .functions + .iter() + .enumerate() + .map(|(idx, function)| { + hercules_codegen::array_alloc::logical_array_alloc( + function, + &typing[idx], + &module.types, + &fork_join_maps[idx], + &bbs[idx], + &fork_join_nests[idx], + ) + }) + .collect(); + + hercules_codegen::cpu_alpha::cpu_alpha_codegen( + &module, + &typing, + &reverse_postorders, + &def_uses, + &bbs, + &antideps, + &array_allocs, + &fork_join_nests, + &std::path::Path::new("test.bc"), + ); } diff --git a/hercules_tools/src/hercules_dot/dot.rs b/hercules_tools/src/hercules_dot/dot.rs index f71a40f18fbae210d1111e19e9094a9f5f12dd97..5ccd6363bafcb59b734923ca873d60f0b70ca1b7 100644 --- a/hercules_tools/src/hercules_dot/dot.rs +++ b/hercules_tools/src/hercules_dot/dot.rs @@ -11,6 +11,7 @@ use self::hercules_ir::*; */ pub fn write_dot<W: Write>( module: &ir::Module, + reverse_postorders: &Vec<Vec<NodeID>>, typing: &ModuleTyping, doms: &Vec<DomTree>, fork_join_maps: &Vec<HashMap<NodeID, NodeID>>, @@ -20,6 +21,11 @@ pub fn write_dot<W: Write>( for function_id in (0..module.functions.len()).map(FunctionID::new) { let function = &module.functions[function_id.idx()]; + let reverse_postorder = &reverse_postorders[function_id.idx()]; + let mut reverse_postorder_node_numbers = vec![0; function.nodes.len()]; + for (idx, id) in reverse_postorder.iter().enumerate() { + reverse_postorder_node_numbers[id.idx()] = idx; + } write_subgraph_header(function_id, module, w)?; // Step 1: draw IR graph itself. This includes all IR nodes and all edges @@ -51,11 +57,24 @@ pub fn write_dot<W: Write>( "dotted" }; + // To have a consistent layout, we will add "back edges" in the + // IR graph as backward facing edges in the graphviz output, so + // that they don't mess up the layout. There isn't necessarily a + // precise definition of a "back edge" in Hercules IR. I've + // found what makes for the most clear output graphs is treating + // edges to phi nodes as back edges when the phi node appears + // before the use in the reverse postorder, and treating a + // control edge a back edge when the destination appears before + // the source in the reverse postorder. + let is_back_edge = reverse_postorder_node_numbers[node_id.idx()] + < reverse_postorder_node_numbers[u.idx()] + && (node.is_phi() || (function.is_control(node_id) && function.is_control(*u))); write_edge( node_id, function_id, *u, function_id, + !is_back_edge, "black", style, module, @@ -73,6 +92,7 @@ pub fn write_dot<W: Write>( function_id, *parent_id, function_id, + true, "darkgreen", "dotted", &module, @@ -88,6 +108,7 @@ pub fn write_dot<W: Write>( function_id, *fork_id, function_id, + true, "darkmagenta", "dotted", &module, @@ -204,6 +225,7 @@ fn write_edge<W: Write>( dst_function_id: FunctionID, src_node_id: NodeID, src_function_id: FunctionID, + forward: bool, color: &str, style: &str, module: &Module, @@ -211,17 +233,32 @@ fn write_edge<W: Write>( ) -> std::fmt::Result { let dst_node = &module.functions[dst_function_id.idx()].nodes[dst_node_id.idx()]; let src_node = &module.functions[src_function_id.idx()].nodes[src_node_id.idx()]; - write!( - w, - "{}_{}_{} -> {}_{}_{} [color={}, style=\"{}\"];\n", - src_node.lower_case_name(), - src_function_id.idx(), - src_node_id.idx(), - dst_node.lower_case_name(), - dst_function_id.idx(), - dst_node_id.idx(), - color, - style, - )?; + if forward { + write!( + w, + "{}_{}_{} -> {}_{}_{} [color={}, style=\"{}\"];\n", + src_node.lower_case_name(), + src_function_id.idx(), + src_node_id.idx(), + dst_node.lower_case_name(), + dst_function_id.idx(), + dst_node_id.idx(), + color, + style, + )?; + } else { + write!( + w, + "{}_{}_{} -> {}_{}_{} [dir=back, color={}, style=\"{}\"];\n", + dst_node.lower_case_name(), + dst_function_id.idx(), + dst_node_id.idx(), + src_node.lower_case_name(), + src_function_id.idx(), + src_node_id.idx(), + color, + style, + )?; + } Ok(()) } diff --git a/hercules_tools/src/hercules_dot/main.rs b/hercules_tools/src/hercules_dot/main.rs index e3543fe5b6e860a4456ea20fe60a2529ea2031d3..a24285ebddb2928a5e6e051bc795959248c7b0df 100644 --- a/hercules_tools/src/hercules_dot/main.rs +++ b/hercules_tools/src/hercules_dot/main.rs @@ -57,7 +57,7 @@ fn main() { (function, (types, constants, dynamic_constants)) }, ); - let (_def_uses, _reverse_postorders, typing, _subgraphs, doms, _postdoms, fork_join_maps) = + let (_def_uses, reverse_postorders, typing, _subgraphs, doms, _postdoms, fork_join_maps) = hercules_ir::verify::verify(&mut module) .expect("PANIC: Failed to verify Hercules IR module."); @@ -68,8 +68,15 @@ fn main() { tmp_path.push(format!("hercules_dot_{}.dot", num)); let mut file = File::create(tmp_path.clone()).expect("PANIC: Unable to open output file."); let mut contents = String::new(); - write_dot(&module, &typing, &doms, &fork_join_maps, &mut contents) - .expect("PANIC: Unable to generate output file contents."); + write_dot( + &module, + &reverse_postorders, + &typing, + &doms, + &fork_join_maps, + &mut contents, + ) + .expect("PANIC: Unable to generate output file contents."); file.write_all(contents.as_bytes()) .expect("PANIC: Unable to write output file contents."); Command::new("xdot") @@ -79,8 +86,15 @@ fn main() { } else { let mut file = File::create(args.output).expect("PANIC: Unable to open output file."); let mut contents = String::new(); - write_dot(&module, &typing, &doms, &fork_join_maps, &mut contents) - .expect("PANIC: Unable to generate output file contents."); + write_dot( + &module, + &reverse_postorders, + &typing, + &doms, + &fork_join_maps, + &mut contents, + ) + .expect("PANIC: Unable to generate output file contents."); file.write_all(contents.as_bytes()) .expect("PANIC: Unable to write output file contents."); } diff --git a/samples/fork_join.hir b/samples/fork_join.hir new file mode 100644 index 0000000000000000000000000000000000000000..e8371c62d4f9be3607d0be4120232aa198b5d760 --- /dev/null +++ b/samples/fork_join.hir @@ -0,0 +1,9 @@ +fn fork_join<2>() -> array(array(u64, #1), #0) + af_ctrl = fork(start, #0) + bf_ctrl = fork(af_ctrl, #1) + bj_ctrl = join(bf_ctrl) + aj_ctrl = join(bj_ctrl) + x = thread_id(bf_ctrl) + bdata = collect(bj_ctrl, x) + adata = collect(aj_ctrl, bdata) + r = return(aj_ctrl, adata) diff --git a/samples/strset.hir b/samples/strset.hir new file mode 100644 index 0000000000000000000000000000000000000000..b83ff39e288139e633e68d2670cdd79b0cf2b64c --- /dev/null +++ b/samples/strset.hir @@ -0,0 +1,17 @@ +fn strset<1>(str: array(u8, #0), byte: u8) -> array(u8, #0) + zero = constant(u64, 0) + one = constant(u64, 1) + bound = dynamic_constant(#0) + loop = region(start, if_true) + idx = phi(loop, zero, idx_inc) + str_inc = phi(loop, str, write) + idx_inc = add(idx, one) + in_bounds = lt(idx_inc, bound) + read = read_array(str_inc, idx) + write = write_array(str_inc, byte, idx) + continue = ne(read, byte) + if_cond = and(continue, in_bounds) + if = if(loop, if_cond) + if_false = read_prod(if, 0) + if_true = read_prod(if, 1) + r = return(if_false, str_inc) \ No newline at end of file