diff --git a/hercules_cg/Cargo.toml b/hercules_cg/Cargo.toml deleted file mode 100644 index 9464153078b92bc8a9ee6f4c5535c4e435395b2a..0000000000000000000000000000000000000000 --- a/hercules_cg/Cargo.toml +++ /dev/null @@ -1,8 +0,0 @@ -[package] -name = "hercules_cg" -version = "0.1.0" -authors = ["Russel Arbore <rarbore2@illinois.edu>"] - -[dependencies] -bitvec = "*" -hercules_ir = { path = "../hercules_ir" } diff --git a/hercules_cg/src/common.rs b/hercules_cg/src/common.rs deleted file mode 100644 index 45061aad0ff7c0885756ae8faad2df2550a9d9b8..0000000000000000000000000000000000000000 --- a/hercules_cg/src/common.rs +++ /dev/null @@ -1,738 +0,0 @@ -extern crate hercules_ir; - -use std::collections::HashMap; -use std::iter::repeat; - -use self::hercules_ir::*; - -/* - * Pretty much all of the codegen functions need to take in some large subset of - * IR structures, analysis results, and global pieces of information. Package - * them all in this struct, and make all the codegen functions members of this - * struct to cut down on the number of function arguments. This structure - * shouldn't be modified after creation. - */ -pub(crate) struct FunctionContext<'a> { - pub(crate) function: &'a Function, - pub(crate) types: &'a Vec<Type>, - pub(crate) constants: &'a Vec<Constant>, - pub(crate) dynamic_constants: &'a Vec<DynamicConstant>, - pub(crate) def_use: &'a ImmutableDefUseMap, - pub(crate) reverse_postorder: &'a Vec<NodeID>, - pub(crate) typing: &'a Vec<TypeID>, - pub(crate) control_subgraph: &'a Subgraph, - pub(crate) fork_join_map: &'a HashMap<NodeID, NodeID>, - pub(crate) fork_join_nest: &'a HashMap<NodeID, Vec<NodeID>>, - pub(crate) antideps: &'a Vec<(NodeID, NodeID)>, - pub(crate) bbs: &'a Vec<NodeID>, - pub(crate) plan: &'a Plan, - pub(crate) llvm_types: &'a Vec<String>, - pub(crate) llvm_constants: &'a Vec<String>, - pub(crate) llvm_dynamic_constants: &'a Vec<String>, - pub(crate) type_sizes_aligns: &'a Vec<(Option<usize>, usize)>, - pub(crate) partitions_inverted_map: Vec<Vec<NodeID>>, -} - -impl<'a> FunctionContext<'a> { - /* - * Find data inputs to a partition. - */ - pub(crate) fn partition_data_inputs(&self, partition_id: PartitionID) -> Vec<NodeID> { - let partition = &self.partitions_inverted_map[partition_id.idx()]; - - let mut data_inputs: Vec<NodeID> = partition - .iter() - .map(|id| { - // For each node in the partition, filter out the uses that are - // data nodes and are in a different partition. - get_uses(&self.function.nodes[id.idx()]) - .as_ref() - .into_iter() - .filter(|id| { - // Filter out control nodes (just looking for data - // inputs here), check that it's in another partition, - // and ignore parameters, constants, and dynamic - // constants (those are each passed to partition - // functions using different mechanisms). - !self.function.nodes[id.idx()].is_control() - && self.plan.partitions[id.idx()] != partition_id - && !self.function.nodes[id.idx()].is_parameter() - && !self.function.nodes[id.idx()].is_constant() - && !self.function.nodes[id.idx()].is_dynamic_constant() - }) - .map(|x| *x) - .collect::<Vec<NodeID>>() - }) - // Collect all such uses across the whole partition. - .flatten() - .collect(); - - // Inputs and outputs of partitions need to be sorted so datums don't - // get mixed up. - data_inputs.sort(); - data_inputs - } - - /* - * Find data outputs of a partition. - */ - pub(crate) fn partition_data_outputs(&self, partition_id: PartitionID) -> Vec<NodeID> { - let partition = &self.partitions_inverted_map[partition_id.idx()]; - - let mut data_outputs: Vec<NodeID> = partition - .iter() - .filter(|id| { - // For each data node in the partition, check if it has any uses - // outside its partition. Users can be control or data nodes. - // Also, don't add parameter, constant, and dynamic constant - // nodes. These nodes are passed to partition mechanisms using - // different mechanism. - !self.function.nodes[id.idx()].is_control() - && !self.function.nodes[id.idx()].is_parameter() - && !self.function.nodes[id.idx()].is_constant() - && !self.function.nodes[id.idx()].is_dynamic_constant() - && self - .def_use - .get_users(**id) - .as_ref() - .into_iter() - .filter(|id| self.plan.partitions[id.idx()] != partition_id) - .map(|x| *x) - .count() - > 0 - }) - .map(|x| *x) - // If this partition contains a return node, the data input of that - // node is a data output. - .chain(partition.iter().filter_map(|id| { - if let Node::Return { control: _, data } = self.function.nodes[id.idx()] { - Some(data) - } else { - None - } - })) - .collect(); - - // Inputs and outputs of partitions need to be sorted so datums don't - // get mixed up. - data_outputs.sort(); - data_outputs - } - - /* - * Find control nodes that will return from a partition. - */ - pub(crate) fn partition_control_returns(&self, partition_id: PartitionID) -> Vec<NodeID> { - let partition = &self.partitions_inverted_map[partition_id.idx()]; - - partition - .iter() - .filter(|id| { - // For each control node in the partition, check if it has any - // users outside its partition. Users can be control nodes - if - // a user in a different partition is a data node, then the - // partition is malformed. Return nodes are also unconditionally - // a control return of this partition. - let outside_user_count = self - .def_use - .get_users(**id) - .as_ref() - .into_iter() - .filter(|user_id| { - // Users of control nodes can only be data nodes - // if they are in the same partition as the - // control node. Only control users may be in a - // different partition. - assert!( - !self.function.nodes[id.idx()].is_control() - || self.function.nodes[user_id.idx()].is_control() - || self.plan.partitions[user_id.idx()] == partition_id - ); - self.plan.partitions[user_id.idx()] != partition_id - }) - .count(); - - // Just calculated for the below assert. - let control_user_count = self - .def_use - .get_users(**id) - .as_ref() - .into_iter() - .filter(|id| self.function.nodes[id.idx()].is_control()) - .count(); - - // A control node cannot have users inside and outside its own - // partition. This is because a well-formedness condition of if - // and match nodes (the only control nodes allowed to have - // multiple users) is their read successors must be in the same - // partition as them. - assert!( - !self.function.nodes[id.idx()].is_control() - || outside_user_count == 0 - || outside_user_count == control_user_count - ); - self.function.nodes[id.idx()].is_control() - && (self.function.nodes[id.idx()].is_return() || outside_user_count > 0) - }) - .map(|x| *x) - .collect() - } - - /* - * Find control successors of a given partition. A partition cannot be a - * control successor of itself, since a self-cycle is represented as control - * flow within a partiion. In other words, the graph of control flow between - * partitions is free of self-loops (an edge connecting a partition to - * itself). - */ - pub(crate) fn partition_control_successors( - &self, - partition_id: PartitionID, - ) -> Vec<(PartitionID, NodeID)> { - let partition = &self.partitions_inverted_map[partition_id.idx()]; - - partition - .iter() - // Only consider nodes in other partitions that are successors of - // control nodes. These are necessarily other control nodes. - .filter(|id| self.function.nodes[id.idx()].is_control()) - .map(|id| { - // Get the partitions (that are not this partition) of successor - // nodes of control nodes. - self.def_use - .get_users(*id) - .as_ref() - .into_iter() - .map(|id| self.plan.partitions[id.idx()]) - .filter(|id| *id != partition_id) - .map(move |part_id| (part_id, *id)) - }) - // We want a flat list of all such partitions. - .flatten() - .collect() - } - - /* - * Calculate the reverse postorder of just this partition. - */ - pub(crate) fn partition_reverse_postorder(&self, partition_id: PartitionID) -> Vec<NodeID> { - self.reverse_postorder - .iter() - .filter(|id| self.plan.partitions[id.idx()] == partition_id) - .map(|x| *x) - .collect() - } - - /* - * Determine the array constant inputs to all partition functions. Get the - * constant IDs, and the array type IDs. Sort by constant ID for - * consistency. - */ - pub(crate) fn partition_array_constant_inputs(&self) -> Vec<(ConstantID, TypeID)> { - let mut res = (0..self.constants.len()) - .filter_map(|idx| { - self.constants[idx] - .try_array_type(self.types) - .map(|ty_id| (ConstantID::new(idx), ty_id)) - }) - .collect::<Vec<_>>(); - - res.sort(); - res - } - - /* - * Determine the dynamic constant inputs to all partition functions. Just - * assemble the dynamic constant IDs, since the type is always u64. Sort the - * parameters for consistency. - */ - pub(crate) fn partition_dynamic_constant_inputs(&self) -> Vec<DynamicConstantID> { - let mut res = (0..self.dynamic_constants.len()) - .filter_map(|idx| { - if self.dynamic_constants[idx].is_parameter() { - Some(DynamicConstantID::new(idx)) - } else { - None - } - }) - .collect::<Vec<_>>(); - - res.sort(); - res - } - - /* - * Determine the array numbers for all the array constants. These are needed - * to know which pointer passed to the runtime corresponds to which array - * constant. Return a map from constant ID to array number - non-array - * constants don't have an array number. - */ - pub(crate) fn array_constant_inputs(&self) -> Vec<Option<u32>> { - self.constants - .iter() - .scan(0, |num, cons| { - if cons.try_array_type(self.types).is_some() { - let res = Some(*num); - *num += 1; - Some(res) - } else { - Some(None) - } - }) - .collect() - } -} - -/* - * When emitting individual nodes in the partition codegen functions, a bunch of - * partition analysis results are needed. Package them all in this struct, and - * make all of the subroutines of the top level partition codegen functions - * members of this struct to cut down on the number of function arguments. This - * structure shouldn't be modified after creation. This structure only holds per - * partition specific information - for example, global function parameters, - * constant parameters, and dynamic constant parameters are not stored, since - * those don't vary across partitions. - */ -pub(crate) struct PartitionContext<'a> { - pub(crate) function: &'a FunctionContext<'a>, - pub(crate) partition_id: PartitionID, - pub(crate) top_node: NodeID, - pub(crate) data_inputs: Vec<NodeID>, - pub(crate) data_outputs: Vec<NodeID>, - pub(crate) control_returns: Vec<NodeID>, - pub(crate) reverse_postorder: Vec<NodeID>, - pub(crate) partition_input_types: Vec<TypeID>, - pub(crate) return_type: Type, - pub(crate) manifest: PartitionManifest, -} - -impl<'a> PartitionContext<'a> { - pub(crate) fn new( - function: &'a FunctionContext<'a>, - partition_id: PartitionID, - top_node: NodeID, - ) -> Self { - let data_inputs = function.partition_data_inputs(partition_id); - let data_outputs = function.partition_data_outputs(partition_id); - let control_returns = function.partition_control_returns(partition_id); - let control_successors = function.partition_control_successors(partition_id); - let reverse_postorder = function.partition_reverse_postorder(partition_id); - - // The data input types are just the types of data nodes used by this - // partition, originating in another partition. - let partition_input_types = data_inputs - .iter() - .map(|id| function.typing[id.idx()]) - .collect(); - - // The return struct contains all of the data outputs, plus control - // information if there are multiple successor partitions. The control - // information is used by the Hercules runtime to implement control flow - // between partitions. - let multiple_control_successors = control_successors.len() > 1; - let output_data_types = data_outputs.iter().map(|id| function.typing[id.idx()]); - let return_type = if multiple_control_successors { - let u64_ty_id = TypeID::new( - function - .types - .iter() - .position(|ty| *ty == Type::UnsignedInteger64) - .unwrap(), - ); - Type::Product( - output_data_types - .chain(std::iter::once(u64_ty_id)) - .collect(), - ) - } else { - Type::Product(output_data_types.collect()) - }; - - // Assemble the manifest. - let mut manifest = PartitionManifest::default(); - manifest.top_node = top_node; - - // The first inputs are the data inputs, from other partitions. - manifest - .inputs - .extend(data_inputs.iter().map(|x| PartitionInput::DataInput(*x))); - - // The next inputs are the function parameters, all in order. - manifest.inputs.extend( - (0..function.function.param_types.len()) - .map(|x| PartitionInput::FunctionArgument(x as u32)), - ); - - // The next inputs are the array constants, all in order. TODO: only - // include constant inputs for constants actually used by this function, - // not all the constants in the module. - manifest.inputs.extend( - (0..(function - .constants - .iter() - .filter(|cons| cons.try_array_type(function.types).is_some()) - .count())) - .map(|x| PartitionInput::ArrayConstant(x as u32)), - ); - - // The last inputs are the dynamic constants, all in order. - manifest.inputs.extend( - (0..function.function.num_dynamic_constants) - .map(|x| PartitionInput::DynamicConstant(x as u32)), - ); - - // The outputs are the data outputs of this partition. - manifest - .outputs - .extend(data_outputs.iter().map(|x| PartitionOutput::DataOutput(*x))); - - // If there are multiple control returns, also output the node being - // returned from. - if multiple_control_successors { - manifest.outputs.push(PartitionOutput::ControlIndicator); - } - - // Store the successor partitions. - manifest.successor_partitions = control_successors - .into_iter() - .map(|(part_id, control_id)| (control_id, part_id)) - .collect(); - - PartitionContext { - function, - partition_id, - top_node, - data_inputs, - data_outputs, - control_returns, - reverse_postorder, - partition_input_types, - return_type, - manifest, - } - } -} - -/* - * Types, constants, and dynamic constants are fairly simple to translate into - * LLVM IR. - */ - -pub(crate) fn generate_type_string(ty: &Type, llvm_types: &Vec<String>) -> String { - match ty { - Type::Control(_) => { - // Later, we create virtual registers corresponding to fork nodes of - // type i64, so we need the "type" of the fork node to be i64. - "i64".to_string() - } - Type::Boolean => "i1".to_string(), - Type::Integer8 | Type::UnsignedInteger8 => "i8".to_string(), - Type::Integer16 | Type::UnsignedInteger16 => "i16".to_string(), - Type::Integer32 | Type::UnsignedInteger32 => "i32".to_string(), - Type::Integer64 | Type::UnsignedInteger64 => "i64".to_string(), - Type::Float32 => "float".to_string(), - Type::Float64 => "double".to_string(), - // Because we traverse in bottom-up order, we can assume that the LLVM - // types for children types are already computed. - Type::Product(fields) => { - let mut iter = fields.iter(); - if let Some(first) = iter.next() { - iter.fold("{".to_string() + &llvm_types[first.idx()], |s, f| { - s + ", " + &llvm_types[f.idx()] - }) + "}" - } else { - "{}".to_string() - } - } - Type::Summation(_) => todo!(), - Type::Array(_, _) => { - // Array types becomes pointers. The element type and dynamic - // constant bounds characterize the access code we generate later, - // not the type itself. - "ptr".to_string() - } - } -} - -pub(crate) fn generate_type_strings(module: &Module) -> Vec<String> { - // Render types into LLVM IR. This requires translating from our interning - // structures to LLVM types. We can't just blow through the types vector, - // since a type may reference a type ID ahead of it in the vector. Instead, - // iterate types in a bottom up order with respect to the type intern DAGs. - let mut llvm_types = vec!["".to_string(); module.types.len()]; - for id in types_bottom_up(&module.types) { - llvm_types[id.idx()] = generate_type_string(&module.types[id.idx()], &llvm_types); - } - - llvm_types -} - -pub(crate) fn generate_constant_string( - cons_id: ConstantID, - cons: &Constant, - tys: &Vec<Type>, - llvm_constants: &Vec<String>, -) -> String { - match cons { - Constant::Boolean(val) => { - if *val { - "true".to_string() - } else { - "false".to_string() - } - } - Constant::Integer8(val) => format!("{}", val), - Constant::Integer16(val) => format!("{}", val), - Constant::Integer32(val) => format!("{}", val), - Constant::Integer64(val) => format!("{}", val), - Constant::UnsignedInteger8(val) => format!("{}", val), - Constant::UnsignedInteger16(val) => format!("{}", val), - Constant::UnsignedInteger32(val) => format!("{}", val), - Constant::UnsignedInteger64(val) => format!("{}", val), - Constant::Float32(val) => { - if val.fract() == 0.0 { - format!("{}.0", val) - } else { - format!("{}", val) - } - } - Constant::Float64(val) => { - if val.fract() == 0.0 { - format!("{}.0", val) - } else { - format!("{}", val) - } - } - Constant::Product(_, _) | Constant::Summation(_, _, _) | Constant::Array(_, _) => { - format!("%cons.{}", cons_id.idx()) - } - Constant::Zero(ty_id) => match tys[ty_id.idx()] { - Type::Product(_) | Type::Summation(_) | Type::Array(_, _) => { - format!("%cons.{}", cons_id.idx()) - } - _ => "zeroinitializer".to_string(), - }, - } -} - -pub(crate) fn generate_constant_strings(module: &Module) -> Vec<String> { - // Render constants into LLVM IR. This is done in a very similar manner as - // types. - let mut llvm_constants = vec!["".to_string(); module.constants.len()]; - for id in constants_bottom_up(&module.constants) { - llvm_constants[id.idx()] = generate_constant_string( - id, - &module.constants[id.idx()], - &module.types, - &llvm_constants, - ); - } - - llvm_constants -} - -pub(crate) fn generate_dynamic_constant_strings(module: &Module) -> Vec<String> { - // Render dynamic constants into LLVM IR. - let mut llvm_dynamic_constants = vec!["".to_string(); module.dynamic_constants.len()]; - for id in (0..module.dynamic_constants.len()).map(DynamicConstantID::new) { - match &module.dynamic_constants[id.idx()] { - DynamicConstant::Constant(val) => llvm_dynamic_constants[id.idx()] = format!("{}", val), - DynamicConstant::Parameter(_) => { - llvm_dynamic_constants[id.idx()] = format!("%dyn_cons.{}", id.idx()) - } - } - } - - llvm_dynamic_constants -} - -/* - * Calculate in-memory size and alignment of a type. The size is optional, since - * array types with dynamic constant dimensions may not have a compile time - * known size. - */ -pub fn type_size_and_alignment(module: &Module, ty: TypeID) -> (Option<usize>, usize) { - match module.types[ty.idx()] { - Type::Control(_) => { - panic!("PANIC: Can't calculate in-memory size and alignment of control type.") - } - Type::Boolean => (Some(1), 1), - Type::Integer8 => (Some(1), 1), - Type::Integer16 => (Some(2), 2), - Type::Integer32 => (Some(4), 4), - Type::Integer64 => (Some(8), 8), - Type::UnsignedInteger8 => (Some(1), 1), - Type::UnsignedInteger16 => (Some(2), 2), - Type::UnsignedInteger32 => (Some(4), 4), - Type::UnsignedInteger64 => (Some(8), 8), - Type::Float32 => (Some(4), 4), - Type::Float64 => (Some(8), 8), - Type::Product(ref fields) => { - let (size, align) = fields - .iter() - .map(|ty| type_size_and_alignment(module, *ty)) - .fold( - (Some(0), 1), - |(acc_size, acc_align), (field_size, field_align)| { - // Alignment of product is maximum alignment of fields. - let new_align = std::cmp::max(acc_align, field_align); - if let (Some(acc_size), Some(field_size)) = (acc_size, field_size) { - // Pre-padding is so that the new field has proper - // alignment within the product. - let mut pre_padding = field_align - acc_size % field_align; - if pre_padding == field_align { - pre_padding = 0; - } - (Some(acc_size + pre_padding + field_size), new_align) - } else { - (None, new_align) - } - }, - ); - - if let Some(size) = size { - // Post-padding is so that the overall in-memory size has the - // right alignment in an array, and is only done at the end. - let mut post_padding = align - size % align; - if post_padding == align { - post_padding = 0; - } - (Some(size + post_padding), align) - } else { - (None, align) - } - } - Type::Summation(_) => todo!(), - Type::Array(elem, ref dims) => { - let (maybe_elem_size, elem_align) = type_size_and_alignment(module, elem); - - // We can only calculate the number of elements at compile time if - // every dynamic constant dimension is a compile-time constant. - let maybe_num_elems = dims.iter().fold(Some(1), |acc, dim| { - Some(acc? * module.dynamic_constants[dim.idx()].try_constant()?) - }); - - // Even if the number of elements is compile-time known, the element - // type may have unknown compile-time size. - if let (Some(elem_size), Some(num_elems)) = (maybe_elem_size, maybe_num_elems) { - (Some(elem_size * num_elems), elem_align) - } else { - (None, elem_align) - } - } - } -} - -/* - * Calculate in-memory bytes representing constant. Return the in-memory bytes - * and the alignment of the constant, if it's non-zero. If it's zero, optionally - * return the size of the constant, and its alignment. TODO: decide on how to - * represent memory layouts at the compiler level. - */ -pub fn embed_constant(module: &Module, cons: ConstantID) -> ConstantBytes { - let unchecked = match module.constants[cons.idx()] { - // Handle zero constant scalars below. - Constant::Boolean(v) => ConstantBytes::NonZero(vec![v as u8], 1), - Constant::Integer8(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 1), - Constant::Integer16(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 2), - Constant::Integer32(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 4), - Constant::Integer64(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 8), - Constant::UnsignedInteger8(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 1), - Constant::UnsignedInteger16(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 2), - Constant::UnsignedInteger32(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 4), - Constant::UnsignedInteger64(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 8), - Constant::Float32(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 4), - Constant::Float64(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 8), - Constant::Product(ty, ref fields) => { - let field_bytes: Vec<ConstantBytes> = fields - .iter() - .map(|field| embed_constant(module, *field)) - .collect(); - - if field_bytes.iter().all(|cb| { - if let ConstantBytes::Zero(_, _) = cb { - true - } else { - false - } - }) { - // If all of the fields are zero constants, then this is a zero - // constant. - let (size, align) = type_size_and_alignment(module, ty); - ConstantBytes::Zero(size, align) - } else { - // We only construct the in-memory bytes if there is a non-zero - // bytes somewhere. - let (mut bytes, align) = field_bytes.into_iter().fold( - (vec![], 0), - |(mut acc_bytes, acc_align), field| { - // Alignment of product is maximum alignment of fields. - let new_align = std::cmp::max(acc_align, field.align()); - - // Pre-padding is so that the new field has proper - // alignment within the product. - while acc_bytes.len() % field.align() != 0 { - acc_bytes.push(0); - } - match field { - ConstantBytes::NonZero(bytes, _) => acc_bytes.extend(&bytes), - ConstantBytes::Zero(size, _) => acc_bytes.extend(repeat(0).take(size.expect("PANIC: Attempted to embed a zero constant with unknown size into a non-zero constant product. Non-zero constants must have compile-time known size. This is probably because an array field is a zero constant with non-constant dynamic constant dimensions."))), - } - (acc_bytes, new_align) - }, - ); - - // Post-padding is so that the overall in-memory vector has the - // right size, and is only done at the end. - while bytes.len() % align != 0 { - bytes.push(0); - } - ConstantBytes::NonZero(bytes, align) - } - } - Constant::Summation(_, _, _) => todo!(), - Constant::Array(ty, ref elements) => { - let element_bytes: Vec<ConstantBytes> = elements - .iter() - .map(|element| embed_constant(module, *element)) - .collect(); - - let (size, align) = type_size_and_alignment(module, ty); - if element_bytes.iter().all(|cb| { - if let ConstantBytes::Zero(_, _) = cb { - true - } else { - false - } - }) { - // If all of the fields are zero constants, then this is a zero - // constant. - ConstantBytes::Zero(size, align) - } else { - let array_bytes: Vec<u8> = element_bytes - .into_iter() - .map(|cb| match cb { - ConstantBytes::NonZero(bytes, _) => bytes, - ConstantBytes::Zero(size, _) => vec![0; size.expect("PANIC: Attempted to embed a zero constant with unknown size into a non-zero constant array. Non-zero constants must have compile-time known size. This is probably because an array element is a zero constant with non-constant dynamic constant dimensions.")], - }) - .flatten() - .collect(); - assert_eq!(array_bytes.len(), size.expect("PANIC: Size of a non-zero constant array is unknown at compile time. All non-zero constants must have compile time known size."), "PANIC: Size of array type calculated by type_size_and_alignment differs from calculated in-memory byte representation's size."); - ConstantBytes::NonZero(array_bytes, align) - } - } - Constant::Zero(ty) => { - let (size, align) = type_size_and_alignment(module, ty); - ConstantBytes::Zero(size, align) - } - }; - - // Catch all code for making zero constant scalars actually - // ConstantBytes::Zero variants. - if let ConstantBytes::NonZero(bytes, align) = &unchecked { - if module.constants[cons.idx()].is_strictly_scalar() && bytes.iter().all(|x| *x == 0) { - return ConstantBytes::Zero(Some(bytes.len()), *align); - } - } - unchecked -} diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs deleted file mode 100644 index b806bd502ae84e073b9ca54837e559d6e920b00f..0000000000000000000000000000000000000000 --- a/hercules_cg/src/cpu.rs +++ /dev/null @@ -1,1019 +0,0 @@ -extern crate bitvec; -extern crate hercules_ir; - -use std::collections::HashMap; -use std::collections::VecDeque; - -use std::iter::zip; - -use std::fmt::Write; - -use self::bitvec::prelude::*; - -use self::hercules_ir::*; - -use crate::*; - -/* - * When assembling LLVM basic blocks, we traverse the nodes in a partition in an - * ad-hoc order. Thus, we cannot assume block terminators will be visited after - * data nodes, for example. However, textual LLVM IR requires that the - * terminator instruction is last. So, we emit nodes into separate strings of - * LLVM IR that will get stichted together when the block is complete. - */ -#[derive(Debug)] -struct LLVMBlock { - header: String, - phis: String, - data: String, - terminator: String, -} - -impl<'a> FunctionContext<'a> { - /* - * Top level function to generate code for a partition, targeting the CPU. - */ - pub(crate) fn codegen_cpu_partition<W: Write>( - &self, - top_node: NodeID, - w: &mut W, - ) -> Result<PartitionManifest, std::fmt::Error> { - // Step 1: do some analysis to get a bunch of per-partition information. - let partition_id = self.plan.partitions[top_node.idx()]; - let partition_context = PartitionContext::new(self, partition_id, top_node); - - // Step 2: emit the function signature. The partition function - // parameters are the function parameters, the partition data inputs, - // the array constant pointers, and the dynamic constants. - let mut partition_function_parameters = partition_context - // The data inputs to this partition. These are the data values - // calculated in a different partition in the same function. - .partition_input_types - .iter() - .enumerate() - .map(|(idx, ty_id)| { - ( - self.llvm_types[ty_id.idx()].clone(), - format!("%part_arg.{}", idx), - ) - }) - // The input types of the overall function. - .chain( - self.function - .param_types - .iter() - .enumerate() - .map(|(idx, ty_id)| { - ( - self.llvm_types[ty_id.idx()].clone(), - format!("%func_arg.{}", idx), - ) - }), - ) - // Array constants are passed in, pre-initialized. - .chain( - self.partition_array_constant_inputs() - .into_iter() - .map(|(id, ty_id)| { - ( - self.llvm_types[ty_id.idx()].clone(), - format!("%cons.{}", id.idx()), - ) - }), - ) - // Dynamic constants are passed in, since they are only known right - // before runtime. - .chain( - self.partition_dynamic_constant_inputs() - .into_iter() - .map(|id| ("i64".to_string(), format!("%dyn_cons.{}", id.idx()))), - ); - - write!( - w, - "define {} @{}_part_{}(", - generate_type_string(&partition_context.return_type, &self.llvm_types), - self.function.name, - partition_id.idx(), - )?; - let (first_ty, first_param) = partition_function_parameters.next().unwrap(); - write!(w, "{} {}", first_ty, first_param)?; - for (ty, param) in partition_function_parameters { - write!(w, ", {} {}", ty, param)?; - } - write!(w, ") {{\n")?; - - // Step 3: set up basic blocks. A node represents a basic block if its - // entry in the basic blocks vector points to itself. - let mut llvm_bbs = HashMap::new(); - for id in &self.partitions_inverted_map[partition_id.idx()] { - if self.bbs[id.idx()] == *id { - llvm_bbs.insert( - id, - LLVMBlock { - header: format!("bb_{}:\n", id.idx()), - phis: "".to_string(), - data: "".to_string(), - terminator: "".to_string(), - }, - ); - } - } - - // Step 4: emit nodes. Nodes are emitted into basic blocks separately as - // nodes are not necessarily emitted in order. Assemble worklist of - // nodes, starting as reverse post order of nodes. For non-phi and non- - // reduce nodes, only emit once all data uses are emitted. In addition, - // consider additional anti-dependence edges from read to write nodes. - let mut visited = bitvec![u8, Lsb0; 0; self.function.nodes.len()]; - let mut worklist = VecDeque::from(partition_context.reverse_postorder.clone()); - while let Some(id) = worklist.pop_front() { - if !(self.function.nodes[id.idx()].is_phi() - || self.function.nodes[id.idx()].is_reduce()) - && !get_uses(&self.function.nodes[id.idx()]) - .as_ref() - .into_iter() - // If this node isn't a phi or reduce, we need to check that - // all uses, as well as all reads we anti-depend with, have - // been emitted. - .chain(self.antideps.iter().filter_map(|(read, write)| { - if id == *write { - Some(read) - } else { - None - } - })) - // Only data dependencies inside this partition need to have - // already been visited. - .all(|id| { - self.plan.partitions[id.idx()] != partition_id - || self.function.nodes[id.idx()].is_control() - || visited[id.idx()] - }) - { - // Skip emitting node if it's not a phi or reduce node and if - // its data uses are not emitted yet. - worklist.push_back(id); - } else { - // Once all of the data dependencies for this node are emitted, - // this node can be emitted. For reduce nodes specifically, we - // want to emit the phi in the fork's basic block, not the - // join's, so we handle that ugly case here. This is because - // there is a fundamental mismatch between Hercules' notion of - // reductions and LLVM's phi nodes. This is ok, since we can - // translate between the two. It's just a pain. - let bb = if let Node::Reduce { - control, - init: _, - reduct: _, - } = self.function.nodes[id.idx()] - { - // Figure out the fork corresponding to the associated join. - let fork_id = if let Node::Join { control } = self.function.nodes[control.idx()] - { - if let Type::Control(factors) = - &self.types[self.typing[control.idx()].idx()] - { - *factors.last().unwrap() - } else { - panic!("PANIC: Type of join node associated with reduce node is not a control type.") - } - } else { - panic!("PANIC: Node associated with reduce node isn't a join node.") - }; - - // Emit in the basic block of the fork. - llvm_bbs.get_mut(&self.bbs[fork_id.idx()]).unwrap() - } else { - // In the normal case, emit in the basic block the node has - // been actually assigned to. - llvm_bbs.get_mut(&self.bbs[id.idx()]).unwrap() - }; - partition_context.codegen_cpu_node(id, bb)?; - visited.set(id.idx(), true); - } - } - - // Step 5: emit the now completed basic blocks, in order. Emit a dummy - // header block to unconditionally jump to the "top" basic block. Also - // emit allocas for compile-time known sized constants. TODO: only emit - // used constants, not all the constants in the module. TODO: emit sum - // constants. - write!(w, "bb_header:\n")?; - for cons_id in (0..self.constants.len()).map(ConstantID::new) { - if let Some(ty_id) = self.constants[cons_id.idx()].try_product_type(&self.types) { - if let (Some(size), align) = self.type_sizes_aligns[ty_id.idx()] { - write!( - w, - " %cons.{} = alloca i8, i32 {}, align {}\n", - cons_id.idx(), - size, - align - )?; - } - } - } - write!(w, " br label %bb_{}\n", top_node.idx())?; - for id in partition_context.reverse_postorder { - if self.bbs[id.idx()] == id { - write!( - w, - "{}{}{}{}", - llvm_bbs[&id].header, - llvm_bbs[&id].phis, - llvm_bbs[&id].data, - llvm_bbs[&id].terminator - )?; - } - } - - // Step 6: close the partition function - we're done. The partition - // manifest is created by the partition context. - write!(w, "}}\n\n")?; - Ok(partition_context.manifest) - } -} - -impl<'a> PartitionContext<'a> { - /* - * Emit LLVM IR implementing a single node. - */ - fn codegen_cpu_node(&self, id: NodeID, bb: &mut LLVMBlock) -> std::fmt::Result { - // Helper to emit code to index a collection. All collections are - // pointers to some memory at the LLVM IR level. This memory is passed - // in as a parameter for anything involving arrays, and is alloca-ed for - // product and summation types. - // TODO: actually do this ^ for products. Right now, products are still - // done at the LLVM struct level w/ GEP and so on. Apologies for anyone - // else reading this comment. - let mut generate_index_code = |collect: NodeID, indices: &[Index]| -> std::fmt::Result { - // Step 1: calculate the list of collection types corresponding to - // each index. - let mut collection_ty_ids = vec![]; - let mut curr_ty_id = self.function.typing[collect.idx()]; - for index in indices { - match (index, &self.function.types[curr_ty_id.idx()]) { - (Index::Field(idx), Type::Product(ty_ids)) - | (Index::Variant(idx), Type::Summation(ty_ids)) => { - collection_ty_ids.push(curr_ty_id); - curr_ty_id = ty_ids[*idx]; - } - (Index::Position(_), Type::Array(elem_ty_id, _)) => { - collection_ty_ids.push(curr_ty_id); - curr_ty_id = *elem_ty_id; - } - _ => { - panic!("PANIC: Found unsupported combination of index and collection type.") - } - } - } - assert!( - self.function.types[curr_ty_id.idx()].is_primitive(), - "PANIC: Cannot generate partial indexing code." - ); - - // Step 2: calculate, as LLVM IR values, the stride and offset - // needed at each level of the collection. For products, the stride - // is calculated using a getelementptr hack (and is the size of the - // struct), and the offset corresponds to the field index (which is - // translated to an offset using another getelementptr hack). For - // arrays, the stride is the dynamic constant extent multiplied by - // the stride of the element type, and the offset is the position - // index multiplied by the stride of the element type. Additionally, - // emit code to add up all of the offsets to get a total offset into - // the collection. TODO: to support summations, and arrays in - // arbitrary places, we need to not use the hacky getelementptr - // technique, since LLVM IR can't represent arrays (in the Hercules - // sense) or summations as primitive types. Instead, we need to do - // collection memory layout entirely ourselves. - let elem_llvm_ty = &self.function.llvm_types[curr_ty_id.idx()]; - write!(bb.data, " %index{}.{}.total_offset = add i64 0, 0\n %index{}.{}.stride.ptrhack = getelementptr {}, ptr null, i64 1\n %index{}.{}.stride = ptrtoint ptr %index{}.{}.stride.ptrhack to i64\n", - id.idx(), indices.len(), id.idx(), indices.len(), elem_llvm_ty, id.idx(), indices.len(), id.idx(), indices.len() - )?; - for (idx, index) in indices.into_iter().enumerate().rev() { - match index { - Index::Field(field) => { - let product_llvm_ty = - &self.function.llvm_types[collection_ty_ids[idx].idx()]; - write!( - bb.data, - " %index{}.{}.stride.ptrhack = getelementptr {}, ptr null, i64 1\n %index{}.{}.stride = ptrtoint ptr %index{}.{}.stride.ptrhack to i64\n %index{}.{}.offset.ptrhack = getelementptr {}, ptr null, i64 0, i32 {}\n %index{}.{}.offset = ptrtoint ptr %index{}.{}.offset.ptrhack to i64\n", - id.idx(), idx, - product_llvm_ty, - id.idx(), idx, - id.idx(), idx, - id.idx(), idx, - product_llvm_ty, - field, - id.idx(), idx, - id.idx(), idx, - )?; - } - Index::Variant(_) => todo!(), - Index::Position(position) => { - let array_extents = self.function.types[collection_ty_ids[idx].idx()] - .try_extents() - .unwrap(); - - // TODO: calculate stride for arrays, needed for arrays - // nested in other collections. - write!(bb.data, " %index{}.{}.offset.add.0 = add ", id.idx(), idx)?; - self.cpu_emit_use_of_node(position[0], Some(id), true, &mut bb.data)?; - write!(bb.data, ", {}\n", 0)?; - for (dim_idx, (extent_dc_id, position_id)) in - zip(array_extents, position.into_iter()).enumerate().skip(1) - { - write!( - bb.data, - " %index{}.{}.offset.mul.{} = mul i64 {}, %index{}.{}.offset.add.{}\n", - id.idx(), idx, - dim_idx, - self.function.llvm_dynamic_constants[extent_dc_id.idx()], - id.idx(), idx, - dim_idx - 1 - )?; - write!( - bb.data, - " %index{}.{}.offset.add.{} = add ", - id.idx(), - idx, - dim_idx - )?; - self.cpu_emit_use_of_node(*position_id, Some(id), true, &mut bb.data)?; - write!( - bb.data, - ", %index{}.{}.offset.mul.{}\n", - id.idx(), - idx, - dim_idx - )?; - } - write!(bb.data, " %index{}.{}.offset = mul i64 %index{}.{}.stride, %index{}.{}.offset.add.{}\n", id.idx(), idx, id.idx(), idx + 1, id.idx(), idx, position.len() - 1)?; - } - Index::Control(_) => panic!( - "PANIC: Found control index when generating collection indexing code." - ), - } - write!( - bb.data, - " %index{}.{}.total_offset = add i64 %index{}.{}.total_offset, %index{}.{}.offset\n", - id.idx(), idx, - id.idx(), idx + 1, - id.idx(), idx - )?; - } - - // Step 3: emit the getelementptr using the total collection offset. - write!(bb.data, " %index{} = getelementptr i8, ptr ", id.idx(),)?; - self.cpu_emit_use_of_node(collect, Some(id), false, &mut bb.data)?; - write!(bb.data, ", i64 %index{}.0.total_offset\n", id.idx())?; - - Ok(()) - }; - - // Helper to find the basic block corresponding to a particular control - // predecessor, for phi nodes. This is needed for when a predecessor - // basic block is in a different partition. In this case, the phi's - // control predecessor is set to the top block of the partition. - let get_phi_predecessor = |pred_id: NodeID| { - if self.function.plan.partitions[pred_id.idx()] == self.partition_id { - format!("{}", self.function.bbs[pred_id.idx()].idx()) - } else { - format!("header") - } - }; - - // Emit the primary IR for each node. - match self.function.function.nodes[id.idx()] { - Node::Start | Node::Region { preds: _ } => { - // Basic blocks containing a start or region node branch - // unconditionally to their single successor. - let successor = self - .function - .def_use - .get_users(id) - .iter() - .filter(|id| self.function.function.nodes[id.idx()].is_strictly_control()) - .next() - .unwrap(); - bb.terminator = format!(" br label %bb_{}\n", successor.idx()); - } - Node::If { control: _, cond } => { - let successors = self.function.def_use.get_users(id); - - // Determine the order of the successors (true/false or false/ - // true) in the successors slice. - let rev = if let Node::Read { - collect: _, - indices, - } = &self.function.function.nodes[successors[0].idx()] - { - indices[0] != Index::Control(0) - } else { - panic!("PANIC: Successor of if node isn't a read node.") - }; - bb.terminator = " br ".to_string(); - self.cpu_emit_use_of_node(cond, Some(id), true, &mut bb.terminator)?; - write!( - bb.terminator, - ", label %bb_{}, label %bb_{}\n", - successors[(!rev) as usize].idx(), - successors[rev as usize].idx() - )?; - } - Node::Fork { control, factor: _ } => { - // Calculate the join and successor. - let join = self.function.fork_join_map[&id]; - let successor = self - .function - .def_use - .get_users(id) - .iter() - .filter(|id| self.function.function.nodes[id.idx()].is_strictly_control()) - .next() - .unwrap(); - - // Create the phi node for the loop index. This is used directly - // by any thread ID user nodes. The control predecessor basic - // blocks are the control node preceding the fork and the - // corresponding join. - write!(bb.phis, " ")?; - self.cpu_emit_use_of_node(id, None, false, &mut bb.phis)?; - write!( - bb.phis, - " = phi i64 [ 0, %bb_{} ], [ %fork_inc{}, %bb_{} ]\n", - get_phi_predecessor(self.function.bbs[control.idx()]), - id.idx(), - get_phi_predecessor(self.function.bbs[join.idx()]), - )?; - - // Increment the loop index by one each iteration. - write!(bb.data, " %fork_inc{} = add i64 1, ", id.idx())?; - self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?; - write!(bb.data, "\n")?; - - // Branch to the successor basic block. - write!( - bb.terminator, - " br label %bb_{}\n", - self.function.bbs[successor.idx()].idx() - )?; - } - Node::Join { control } => { - // Get the fork, it's factor, and the successor to this join. - let fork_id = if let Type::Control(factors) = - &self.function.types[self.function.typing[control.idx()].idx()] - { - *factors.last().unwrap() - } else { - panic!("PANIC: The type of a join node is incorrect.") - }; - let factor = if let Node::Fork { control: _, factor } = - &self.function.function.nodes[fork_id.idx()] - { - *factor - } else { - panic!("PANIC: The node referenced by the control type of a join node is not a fork.") - }; - let successor = self - .function - .def_use - .get_users(id) - .iter() - .filter(|id| self.function.function.nodes[id.idx()].is_strictly_control()) - .next() - .unwrap(); - - // Form the bottom of the loop. Check if the loop is finished, - // and branch between the successor and the fork. The structure - // of this loop implies that fork-joins have to iterate at least - // once. Change the loop termination branch target if this is a - // control return (see comment below for more details). - let is_control_return = self.control_returns.contains(&id); - write!( - bb.terminator, - " %join_cond{} = icmp ult i64 %fork_inc{}, {}\n", - id.idx(), - fork_id.idx(), - self.function.llvm_dynamic_constants[factor.idx()] - )?; - write!( - bb.terminator, - " br i1 %join_cond{}, label %bb_{}, label %bb_{}\n", - id.idx(), - self.function.bbs[fork_id.idx()].idx(), - if is_control_return { - format!("{}_join_cr", id.idx()) - } else { - format!("{}", self.function.bbs[successor.idx()].idx()) - } - )?; - - // Join nodes are the only node that can be a control return - // from a partition and generate a conditional branch. This - // means we have to do this really ugly hack where we insert - // another basic block to be the control return that we - // conditionally branch to. Other control nodes that may be - // control returns don't have this problem, because they always - // unconditionally branch to their destination. We add this LLVM - // IR text of a new basic block in the terminator of the current - // basic block, since we don't have mutable access here to the - // set of all LLVM basic blocks. - if is_control_return { - write!(bb.terminator, "bb_{}_join_cr:\n", id.idx())?; - } - } - Node::Phi { - control: _, - ref data, - } => { - // For each predecessor of the associated region, we determine - // if that predecessor is in this partition or not. If so, then - // the predecessor control is just the basic block of the - // predecessor control node. If not, the predecessor control is - // the first basic block of the partition. The corresponding - // datum also needs to be provided by argument to the partition, - // and this is handled by cpu_emit_use_of_node. - let pred_ids = - get_uses(&self.function.function.nodes[self.function.bbs[id.idx()].idx()]); - let mut control_datum_pairs = zip(data.into_iter(), pred_ids.as_ref().iter()) - .map(|(datum, pred_id)| (*datum, get_phi_predecessor(*pred_id))); - - // TODO: this code burns my eyes to look at, it might be worth - // making this not carcinogenic. - write!(bb.phis, " ")?; - self.cpu_emit_use_of_node(id, None, false, &mut bb.phis)?; - write!( - bb.phis, - " = phi {} [ ", - self.function.llvm_types[self.function.typing[id.idx()].idx()] - )?; - let (first_data, first_control) = control_datum_pairs.next().unwrap(); - self.cpu_emit_use_of_node(first_data, Some(id), false, &mut bb.phis)?; - write!(bb.phis, ", %bb_{} ]", first_control)?; - for (data, control) in control_datum_pairs { - write!(bb.phis, ", [ ")?; - self.cpu_emit_use_of_node(data, Some(id), false, &mut bb.phis)?; - write!(bb.phis, ", %bb_{} ]", control)?; - } - write!(bb.phis, "\n")?; - } - Node::ThreadID { control } => { - // Just bitcast the loop index from the fork. The bitcast is a - // no-op, but we add it to copy the value from the virtual - // register the fork generates to the virtual register - // corresponding to this thread ID node. - assert!(self.function.function.nodes[control.idx()].is_fork()); - write!(bb.data, " ")?; - self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?; - write!(bb.data, " = bitcast i64 ",)?; - self.cpu_emit_use_of_node(control, Some(id), false, &mut bb.data)?; - write!(bb.data, " to i64\n",)?; - } - Node::Reduce { - control, - init, - reduct, - } => { - // Figure out the fork corresponding to the associated join. - let fork_id = if let Node::Join { control } = - self.function.function.nodes[control.idx()] - { - if let Type::Control(factors) = - &self.function.types[self.function.typing[control.idx()].idx()] - { - *factors.last().unwrap() - } else { - panic!("PANIC: Type of join node associated with reduce node is not a control type.") - } - } else { - panic!("PANIC: Node associated with reduce node isn't a join node.") - }; - - // Figure out the fork's predecessor. - let pred = if let Node::Fork { control, factor: _ } = - self.function.function.nodes[fork_id.idx()] - { - control - } else { - panic!("PANIC: Node referenced in type of join node associated with a reduce node is not a fork node.") - }; - - // Reduce nodes just lower to phi nodes. We already did the ugly - // hack so that "bb" refers to the basic block of the fork, - // rather than the join. So, now we just need to emit the phi. - write!(bb.phis, " ")?; - self.cpu_emit_use_of_node(id, Some(id), false, &mut bb.phis)?; - write!( - bb.phis, - " = phi {} [ ", - self.function.llvm_types[self.function.typing[id.idx()].idx()] - )?; - self.cpu_emit_use_of_node(init, Some(id), false, &mut bb.phis)?; - write!( - bb.phis, - ", %bb_{} ], [ ", - get_phi_predecessor(self.function.bbs[pred.idx()]) - )?; - self.cpu_emit_use_of_node(reduct, Some(id), false, &mut bb.phis)?; - write!( - bb.phis, - ", %bb_{} ]\n", - get_phi_predecessor(self.function.bbs[control.idx()]) - )?; - } - // These nodes are handled by other mechanisms in the code lowering - // process. - Node::Return { - control: _, - data: _, - } - | Node::Parameter { index: _ } - | Node::Constant { id: _ } - | Node::DynamicConstant { id: _ } => {} - Node::Binary { left, right, op } => { - let op = match op { - BinaryOperator::Add => { - if self.function.types[self.function.typing[left.idx()].idx()].is_float() { - "fadd" - } else { - "add" - } - } - BinaryOperator::Sub => { - if self.function.types[self.function.typing[left.idx()].idx()].is_float() { - "fsub" - } else { - "sub" - } - } - BinaryOperator::Mul => { - if self.function.types[self.function.typing[left.idx()].idx()].is_float() { - "fmul" - } else { - "mul" - } - } - BinaryOperator::Div => { - if self.function.types[self.function.typing[left.idx()].idx()].is_float() { - "fdiv" - } else if self.function.types[self.function.typing[left.idx()].idx()] - .is_unsigned() - { - "udiv" - } else { - "sdiv" - } - } - BinaryOperator::Rem => { - if self.function.types[self.function.typing[left.idx()].idx()].is_float() { - "frem" - } else if self.function.types[self.function.typing[left.idx()].idx()] - .is_unsigned() - { - "urem" - } else { - "srem" - } - } - BinaryOperator::LT => { - if self.function.types[self.function.typing[left.idx()].idx()].is_float() { - "fcmp olt" - } else if self.function.types[self.function.typing[left.idx()].idx()] - .is_unsigned() - { - "icmp ult" - } else { - "icmp slt" - } - } - BinaryOperator::LTE => { - if self.function.types[self.function.typing[left.idx()].idx()].is_float() { - "fcmp ole" - } else if self.function.types[self.function.typing[left.idx()].idx()] - .is_unsigned() - { - "icmp ule" - } else { - "icmp sle" - } - } - BinaryOperator::GT => { - if self.function.types[self.function.typing[left.idx()].idx()].is_float() { - "fcmp ogt" - } else if self.function.types[self.function.typing[left.idx()].idx()] - .is_unsigned() - { - "icmp ugt" - } else { - "icmp sgt" - } - } - BinaryOperator::GTE => { - if self.function.types[self.function.typing[left.idx()].idx()].is_float() { - "fcmp oge" - } else if self.function.types[self.function.typing[left.idx()].idx()] - .is_unsigned() - { - "icmp uge" - } else { - "icmp sge" - } - } - BinaryOperator::EQ => { - if self.function.types[self.function.typing[left.idx()].idx()].is_float() { - "fcmp oeq" - } else { - "icmp eq" - } - } - BinaryOperator::NE => { - if self.function.types[self.function.typing[left.idx()].idx()].is_float() { - "fcmp one" - } else { - "icmp ne" - } - } - BinaryOperator::Or => "or", - BinaryOperator::And => "and", - BinaryOperator::Xor => "xor", - BinaryOperator::LSh => "lsh", - BinaryOperator::RSh => { - if self.function.types[self.function.typing[left.idx()].idx()].is_unsigned() - { - "lshr" - } else { - "ashr" - } - } - }; - write!(bb.data, " ")?; - self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?; - write!(bb.data, " = {} ", op)?; - self.cpu_emit_use_of_node(left, Some(id), true, &mut bb.data)?; - write!(bb.data, ", ")?; - self.cpu_emit_use_of_node(right, Some(id), false, &mut bb.data)?; - write!(bb.data, "\n")?; - } - Node::Read { - collect, - ref indices, - } => { - if self.function.function.nodes[collect.idx()].is_strictly_control() { - // Read nodes may be projection succesors of if or match - // nodes. - let successor = self.function.def_use.get_users(id)[0]; - write!( - bb.terminator, - " br label %bb_{}\n", - self.function.bbs[successor.idx()].idx() - )?; - } else { - generate_index_code(collect, indices)?; - write!(bb.data, " ")?; - self.cpu_emit_use_of_node(id, Some(id), false, &mut bb.data)?; - write!( - bb.data, - " = load {}, ptr %index{}\n", - self.function.llvm_types[self.function.typing[id.idx()].idx()], - id.idx(), - )?; - } - } - Node::Write { - collect, - data, - ref indices, - } => { - generate_index_code(collect, indices)?; - write!( - bb.data, - " store {} ", - self.function.llvm_types[self.function.typing[data.idx()].idx()] - )?; - self.cpu_emit_use_of_node(data, Some(id), false, &mut bb.data)?; - write!(bb.data, ", ptr %index{}\n", id.idx())?; - - // We can't just "copy" in LLVM IR, but we want to forward the - // pointer, unchanged, as the "output" of this write node. The - // easiest way to do this is to insert a useless bitcast. - write!(bb.data, " ")?; - self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?; - write!(bb.data, " = bitcast ptr ")?; - self.cpu_emit_use_of_node(collect, Some(id), false, &mut bb.data)?; - write!(bb.data, " to ptr\n")?; - } - _ => { - eprintln!("TO LOWER: {:?}", self.function.function.nodes[id.idx()]); - } - } - - // If this node is a control return, we emit a return from this - // partition function. - if self.control_returns.contains(&id) { - // Get rid of the old terminator, replace with return. Don't do this - // if this node is a join node, since in that specific case we - // generate specific control return logic. See the join node codegen - // above for more details. - if !self.function.function.nodes[id.idx()].is_join() { - bb.terminator.clear(); - } - - // Making structs from the aggregated values in LLVM IR is a pain. - // We need to, one-by-one, insertvalue each element into the struct. - let ret_ty_str = generate_type_string(&self.return_type, &self.function.llvm_types); - for (idx, data_output_id) in self.data_outputs.iter().enumerate() { - write!( - bb.terminator, - " %ret_agg{}.{} = insertvalue {} {}, ", - id.idx(), - idx, - ret_ty_str, - if idx == 0 { - "undef".to_string() - } else { - format!("%ret_agg{}.{}", id.idx(), idx - 1) - } - )?; - let mut data_output_id = *data_output_id; - - // Handle reduce specially here. Technically, the "user" here is - // the join node, so cpu_emit_use_of_node would normally emit - // the reduce node's virtual register directly. However, if a - // data output is the result of a reduce node, that is - // definitely outside for the corresponding fork-join. Thus, we - // actually need to use the reduction use of the reduce node. - // This all only applies if the reduce node is in the current - // partition. If not, then use the reduce node as the argument - // to cpu_emit_use_of_node as normal, so that the partition - // function argument is properly used. - while let Node::Reduce { - control: _, - init: _, - reduct, - } = self.function.function.nodes[data_output_id.idx()] - && self.partition_id == self.function.plan.partitions[data_output_id.idx()] - { - data_output_id = reduct; - } - self.cpu_emit_use_of_node(data_output_id, None, true, &mut bb.terminator)?; - write!(bb.terminator, ", {}\n", idx)?; - } - - // Now, we can return the aggregate value we calculated. - if self.data_outputs.is_empty() && self.control_returns.len() == 1 { - // If there are no data outputs, just return the empty struct. - write!(bb.terminator, " ret {} zeroinitializer\n", ret_ty_str)?; - } else if self.data_outputs.is_empty() { - // If there are multiple control returns, we need to return the - // node ID of the control return, so that the runtime can do - // control flow between partitions. In this case, there aren't - // any data outputs that also need to be returned. - write!(bb.terminator, " %ret_agg{}.ctrl_pos = insertvalue {} undef, i64 {}, 0\n ret {} %ret_agg{}.ctrl_pos\n", - id.idx(), - ret_ty_str, - id.idx(), - ret_ty_str, - id.idx() - )?; - } else if self.control_returns.len() == 1 { - // In the normal case, we return the struct containing just the - // data outputs. - write!( - bb.terminator, - " ret {} %ret_agg{}.{}\n", - ret_ty_str, - id.idx(), - self.data_outputs.len() - 1, - )?; - } else { - // If there are multiple control returns from this partition and - // there are data outputs, we add the control return node ID to - // the return aggregate. - write!( - bb.terminator, - " %ret_agg{}.ctrl_pos = insertvalue {} %ret_agg{}.{}, i64 {}, {}\n ret {} %ret_agg{}.ctrl_pos\n", - id.idx(), - ret_ty_str, - id.idx(), - self.data_outputs.len() - 1, - id.idx(), - self.data_outputs.len(), - ret_ty_str, - id.idx(), - )?; - } - } - - Ok(()) - } - - /* - * Emit the LLVM value corresponding to a node. Optionally prefix with the - * LLVM type, which is required by textual LLVM IR in a few places. - * Optionally provide the node that will be using this emission. This is - * unused by all emitted node values except reduce nodes, which require the - * user argument to be given. We chose this interface because at the - * callsite of a cpu_emit_use_of_node, it is always known whether this thing - * being emitted could (or should) possibly be a reduce node. If not, then - * providing none gives a nice early panic when it is a reduce node, either - * because the developer misjudged or because there is a bug. - */ - fn cpu_emit_use_of_node<W: Write>( - &self, - id: NodeID, - user: Option<NodeID>, - emit_type: bool, - w: &mut W, - ) -> std::fmt::Result { - // First, emit the type before the value (if applicable). - if emit_type { - write!( - w, - "{} ", - self.function.llvm_types[self.function.typing[id.idx()].idx()] - )?; - } - - // Emitting the value can be surprisingly complicated, depending on what - // the node is. For example, partition arguments are emitted specially. - if let Some(input_idx) = self.data_inputs.iter().position(|inp_id| *inp_id == id) { - // If a use is in another partition, it needs to get passed to this - // partition's function as a parameter. - write!(w, "%part_arg.{}", input_idx)?; - } else { - match self.function.function.nodes[id.idx()] { - // Parameter nodes in this partition also represent parameters - // to this partition function. - Node::Parameter { index } => write!(w, "%func_arg.{}", index)?, - // Constants are pre-defined. - Node::Constant { id } => write!(w, "{}", self.function.llvm_constants[id.idx()])?, - Node::DynamicConstant { id } => { - write!(w, "{}", self.function.llvm_dynamic_constants[id.idx()])? - } - // Reduce nodes, as usual, are not nice to handle. We need to - // emit different LLVM depending on whether the user is inside - // or outside the reduce's corresponding fork-join nest. Inside, - // we emit as usual, since the user needs to use the phi node - // inside the reduction loop. Outside, we need to use the reduct - // use of the reduce node, so that we don't grab the reduction - // variable one loop iteration too early. - Node::Reduce { - control, - init: _, - reduct, - } => { - // Figure out the fork corresponding to the associated join. - let fork_id = if let Node::Join { control } = - self.function.function.nodes[control.idx()] - { - if let Type::Control(factors) = - &self.function.types[self.function.typing[control.idx()].idx()] - { - *factors.last().unwrap() - } else { - panic!() - } - } else { - panic!() - }; - - // Check if the basic block containing the user node is in - // the fork-join nest for this reduce node. We make the user - // node an optional argument as a debugging tool - if we - // exercise this code branch when generating the code for a - // node that absolutely should not be using the result of a - // reduce node, we would like to know! - if self.function.fork_join_nest[&self.function.bbs[user.expect("PANIC: cpu_emit_use_of_node was called on a reduce node, but no user node ID was given.").idx()]] - .contains(&fork_id) - { - // If the user is inside the fork-join nest, then emit - // the reduce node directly. - assert_eq!(self.partition_id, self.function.plan.partitions[id.idx()]); - write!(w, "%virt.{}", id.idx())?; - } else { - // If the user is outside the fork-join nest, then - // recursively emit on the reduction input to the reduce - // node. This is needed when there is a reduce chain. - assert_eq!( - self.partition_id, - self.function.plan.partitions[reduct.idx()] - ); - self.cpu_emit_use_of_node(reduct, user, emit_type, w)?; - } - } - // Uses that are in this partition are just virtual registers. - // Clang is really annoying about numbering virtual registers, - // so to avoid that silliness we prepend all our virtual - // registers with a prefix indicating what kind of thing it is. - // For normal values, we use "virt" for virtual register. - _ => { - assert_eq!(self.partition_id, self.function.plan.partitions[id.idx()]); - write!(w, "%virt.{}", id.idx())?; - } - } - } - - Ok(()) - } -} diff --git a/hercules_cg/src/lib.rs b/hercules_cg/src/lib.rs deleted file mode 100644 index c33fd857ca71b4c82135d7ce933ea441d3ece4f4..0000000000000000000000000000000000000000 --- a/hercules_cg/src/lib.rs +++ /dev/null @@ -1,9 +0,0 @@ -#![feature(let_chains)] - -pub mod common; -pub mod cpu; -pub mod top; - -pub use crate::common::*; -pub use crate::cpu::*; -pub use crate::top::*; diff --git a/hercules_cg/src/top.rs b/hercules_cg/src/top.rs deleted file mode 100644 index 2da69355803a695e2ef2266e51b00ce3f15ac0f9..0000000000000000000000000000000000000000 --- a/hercules_cg/src/top.rs +++ /dev/null @@ -1,204 +0,0 @@ -extern crate hercules_ir; - -use std::collections::HashMap; -use std::fmt::Write; - -use self::hercules_ir::*; - -use crate::*; - -/* - * Top level function to generate code for a module. Emits LLVM IR text. Calls - * out to backends to generate code for individual partitions. Creates a - * manifest describing the generated code. - */ -pub fn codegen<W: Write>( - module: &Module, - def_uses: &Vec<ImmutableDefUseMap>, - reverse_postorders: &Vec<Vec<NodeID>>, - typing: &ModuleTyping, - control_subgraphs: &Vec<Subgraph>, - fork_join_maps: &Vec<HashMap<NodeID, NodeID>>, - fork_join_nests: &Vec<HashMap<NodeID, Vec<NodeID>>>, - antideps: &Vec<Vec<(NodeID, NodeID)>>, - bbs: &Vec<Vec<NodeID>>, - plans: &Vec<Plan>, - w: &mut W, -) -> Result<ModuleManifest, std::fmt::Error> { - // Render types, constants, and dynamic constants into LLVM IR. - let llvm_types = generate_type_strings(module); - let llvm_constants = generate_constant_strings(module); - let llvm_dynamic_constants = generate_dynamic_constant_strings(module); - let type_sizes_aligns = (0..module.types.len()) - .map(|idx| { - if module.types[idx].is_control() { - (None, 0) - } else { - type_size_and_alignment(module, TypeID::new(idx)) - } - }) - .collect(); - - // Generate a dummy uninitialized global - this is needed so that there'll - // be a non-empty .bss section in the ELF object file. - write!(w, "@dummy = dso_local global i32 0, align 4\n")?; - - // Do codegen for each function individually. Get each function's manifest. - let mut manifests = vec![]; - for function_idx in 0..module.functions.len() { - // There's a bunch of per-function information we use. - let context = FunctionContext { - function: &module.functions[function_idx], - types: &module.types, - constants: &module.constants, - dynamic_constants: &module.dynamic_constants, - def_use: &def_uses[function_idx], - reverse_postorder: &reverse_postorders[function_idx], - typing: &typing[function_idx], - control_subgraph: &control_subgraphs[function_idx], - fork_join_map: &fork_join_maps[function_idx], - fork_join_nest: &fork_join_nests[function_idx], - antideps: &antideps[function_idx], - bbs: &bbs[function_idx], - plan: &plans[function_idx], - llvm_types: &llvm_types, - llvm_constants: &llvm_constants, - llvm_dynamic_constants: &llvm_dynamic_constants, - type_sizes_aligns: &type_sizes_aligns, - partitions_inverted_map: plans[function_idx].invert_partition_map(), - }; - - manifests.push(context.codegen_function(w)?); - } - - // Assemble the manifest for the whole module. - Ok(ModuleManifest { - functions: manifests, - types: module.types.clone(), - type_sizes_aligns, - dynamic_constants: module.dynamic_constants.clone(), - // Get the types of all of the constants. This requires collecting over - // all of the functions, since the calculated types of constants may be - // distributed over many functions. This may contain duplicate mappings, - // but this should be fine for our purposes, since the mappings - // shouldn't conflict. - constant_types: module - .functions - .iter() - .enumerate() - .map(|(func_idx, function)| { - function - .nodes - .iter() - .enumerate() - .filter_map(move |(idx, node)| { - Some((node.try_constant()?, typing[func_idx][idx])) - }) - }) - .flatten() - .collect(), - array_constants: (0..module.constants.len()) - .map(ConstantID::new) - .filter_map(|cons_id| { - if module.constants[cons_id.idx()] - .try_array_type(&module.types) - .is_some() - { - Some(embed_constant(module, cons_id)) - } else { - None - } - }) - .collect(), - array_cons_ids: (0..module.constants.len()) - .map(ConstantID::new) - .filter(|id| { - module.constants[id.idx()] - .try_array_type(&module.types) - .is_some() - }) - .collect(), - }) -} - -impl<'a> FunctionContext<'a> { - /* - * Each function gets codegened separately. - */ - fn codegen_function<W: Write>(&self, w: &mut W) -> Result<FunctionManifest, std::fmt::Error> { - // Find the "top" control node of each partition. One well-formedness - // condition of partitions is that there is exactly one "top" control - // node. - let top_nodes: Vec<NodeID> = self - .partitions_inverted_map - .iter() - .enumerate() - .map(|(part_idx, part)| { - // For each partition, find the "top" node. - *part - .iter() - .filter(move |id| { - // The "top" node is a control node having at least one - // control predecessor in another partition, or is a - // start node. Every predecessor in the control subgraph - // is a control node. - self.function.nodes[id.idx()].is_start() - || (self.function.nodes[id.idx()].is_control() - && self - .control_subgraph - .preds(**id) - .filter(|pred_id| { - self.plan.partitions[pred_id.idx()].idx() != part_idx - }) - .count() - > 0) - }) - .next() - .unwrap() - }) - .collect(); - - // Collect all the node IDs that are values returned by this function. - let returned_values = self - .function - .nodes - .iter() - .filter_map(|node| node.try_return().map(|(_, data)| data)) - .collect(); - - // Get the partition ID of the start node. - let top_partition = self.plan.partitions[0]; - - // Generate code for each individual partition. This generates a single - // LLVM function per partition. These functions will be called in async - // tasks by the Hercules runtime. - assert_eq!(self.plan.num_partitions, top_nodes.len()); - let mut manifests = vec![]; - for part_idx in 0..self.plan.num_partitions { - match self.plan.partition_devices[part_idx] { - Device::CPU => manifests.push(self.codegen_cpu_partition(top_nodes[part_idx], w)?), - Device::GPU => todo!(), - } - } - - // Assemble the manifest for the whole function. - Ok(FunctionManifest { - name: self.function.name.clone(), - param_types: self.function.param_types.clone(), - return_type: self.function.return_type, - typing: self.typing.clone(), - used_constants: self - .function - .nodes - .iter() - .filter_map(|node| node.try_constant()) - .collect(), - num_dynamic_constant_parameters: self.function.num_dynamic_constants, - partitions: manifests, - // TODO: populate dynamic constant rules. - dynamic_constant_rules: vec![], - top_partition, - returned_values, - }) - } -} diff --git a/hercules_ir/src/build.rs b/hercules_ir/src/build.rs index b451dcb8c3da2b7f0399a7613927357243e0b54a..cfc59a2b4ce5283fdc709371cba21ec17c2e0f0c 100644 --- a/hercules_ir/src/build.rs +++ b/hercules_ir/src/build.rs @@ -354,24 +354,14 @@ impl<'a> Builder<'a> { pub fn create_constant_array( &mut self, elem_ty: TypeID, - cons: Box<[ConstantID]>, extents: Box<[u32]>, ) -> BuilderResult<ConstantID> { - for con in cons.iter() { - if self.constant_types[con.idx()] != elem_ty { - Err("Constant provided to create_constant_array has a different type than the provided element type.")? - } - } let extents = extents .iter() .map(|extent| self.create_dynamic_constant_constant(*extent as usize)) .collect(); let ty = self.create_type_array(elem_ty, extents); - Ok(self.intern_constant(Constant::Array(ty, cons), ty)) - } - - pub fn create_constant_zero(&mut self, typ : TypeID) -> ConstantID { - self.intern_constant(Constant::Zero(typ), typ) + Ok(self.intern_constant(Constant::Array(ty), ty)) } pub fn create_dynamic_constant_constant(&mut self, val: usize) -> DynamicConstantID { diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs index c7462613847ab73aad421aa105a8e4a81b449040..bde6be4ad8789b14c612e6e1f6341483faefff29 100644 --- a/hercules_ir/src/dataflow.rs +++ b/hercules_ir/src/dataflow.rs @@ -375,10 +375,9 @@ pub fn immediate_control_flow( .into_iter() .fold(UnionNodeSet::top(), |a, b| UnionNodeSet::meet(&a, b)); } - let node = &function.nodes[node_id.idx()]; // Step 2: clear all bits and set bit for current node, if applicable. - if node.is_control() { + if function.nodes[node_id.idx()].is_control() { let mut singular = bitvec![u8, Lsb0; 0; function.nodes.len()]; singular.set(node_id.idx(), true); out = UnionNodeSet::Bits(singular); diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs index 63053842406ed696ca68570323d8994aa904cac6..32f3a5787d1e273d6f393a95bc08b31b73038f8c 100644 --- a/hercules_ir/src/dot.rs +++ b/hercules_ir/src/dot.rs @@ -20,7 +20,9 @@ pub fn xdot_module( reverse_postorders: &Vec<Vec<NodeID>>, doms: Option<&Vec<DomTree>>, fork_join_maps: Option<&Vec<HashMap<NodeID, NodeID>>>, + bbs: Option<&Vec<Vec<NodeID>>>, plans: Option<&Vec<Plan>>, + fork_join_placements: Option<&Vec<Vec<ForkJoinPlacement>>>, ) { let mut tmp_path = temp_dir(); let mut rng = rand::thread_rng(); @@ -33,7 +35,9 @@ pub fn xdot_module( &reverse_postorders, doms, fork_join_maps, + bbs, plans, + fork_join_placements, &mut contents, ) .expect("PANIC: Unable to generate output file contents."); @@ -54,7 +58,9 @@ pub fn write_dot<W: Write>( reverse_postorders: &Vec<Vec<NodeID>>, doms: Option<&Vec<DomTree>>, fork_join_maps: Option<&Vec<HashMap<NodeID, NodeID>>>, + bbs: Option<&Vec<Vec<NodeID>>>, plans: Option<&Vec<Plan>>, + fork_join_placements: Option<&Vec<Vec<ForkJoinPlacement>>>, w: &mut W, ) -> std::fmt::Result { write_digraph_header(w)?; @@ -190,6 +196,66 @@ pub fn write_dot<W: Write>( } } + // Step 4: draw BB edges in light magenta. + if let Some(bbs) = bbs { + let bbs = &bbs[function_id.idx()]; + for node_idx in 0..bbs.len() { + let maybe_data = NodeID::new(node_idx); + let control = bbs[node_idx]; + if maybe_data != control { + write_edge( + maybe_data, + function_id, + control, + function_id, + true, + "olivedrab4, constraint=false", + "dotted", + &module, + w, + )?; + } + } + } + + // Step 5: draw fork-join placement edges in purple. + if let Some(fork_join_placements) = fork_join_placements { + let fork_join_map = &fork_join_maps.unwrap()[function_id.idx()]; + let fork_join_placement = &fork_join_placements[function_id.idx()]; + for node_idx in 0..fork_join_placement.len() { + let node_id = NodeID::new(node_idx); + match fork_join_placement[node_id.idx()] { + ForkJoinPlacement::Sequential => {} + ForkJoinPlacement::Fork(fork_id) => { + write_edge( + node_id, + function_id, + fork_id, + function_id, + true, + "purple, constraint=false", + "dotted", + &module, + w, + )?; + } + ForkJoinPlacement::Reduce(fork_id) => { + write_edge( + node_id, + function_id, + fork_join_map[&fork_id], + function_id, + true, + "purple, constraint=false", + "dotted", + &module, + w, + )?; + } + } + } + } + write_graph_footer(w)?; } diff --git a/hercules_ir/src/gcm.rs b/hercules_ir/src/gcm.rs index 60e7935852fea297d6cce4b86d42edbbf635228a..9da269885c84cf802eba561eb0aad9334ed21103 100644 --- a/hercules_ir/src/gcm.rs +++ b/hercules_ir/src/gcm.rs @@ -1,4 +1,8 @@ -use std::collections::HashMap; +extern crate bitvec; + +use std::collections::{HashMap, VecDeque}; + +use self::bitvec::prelude::*; use crate::*; @@ -54,7 +58,7 @@ pub fn gcm( .unwrap_or(highest); // If the ancestor of the control users isn't below the lowest - // control use, then just place in the loewst control use. + // control use, then just place in the lowest control use. if !dom.does_dom(highest, lowest) { highest } else { @@ -76,6 +80,51 @@ pub fn gcm( } } + // If the assigned location is a join and this node doesn't use + // a reduce from that join, we actually want to place these + // nodes in the predecessor of the join, so that the code will + // get executed in parallel. + if let Some(control) = function.nodes[location.idx()].try_join() + && location != NodeID::new(idx) + { + // Set up BFS to find reduce nodes. + let mut bfs = VecDeque::new(); + let mut bfs_visited = bitvec![u8, Lsb0; 0; function.nodes.len()]; + bfs.push_back(NodeID::new(idx)); + bfs_visited.set(idx, true); + let mut found_reduce = false; + 'bfs: while let Some(id) = bfs.pop_front() { + for use_id in get_uses(&function.nodes[id.idx()]).as_ref() { + // If we find a reduce, check that it's attached to + // the join we care about + if let Some((join, _, _)) = function.nodes[use_id.idx()].try_reduce() + && join == location + { + found_reduce = true; + break 'bfs; + } + + // Only go through data nodes. + if bfs_visited[use_id.idx()] + || function.nodes[use_id.idx()].is_control() + { + continue; + } + + bfs.push_back(*use_id); + bfs_visited.set(use_id.idx(), true); + } + } + + // If we don't depend on the reduce, we're not in a cycle + // with the reduce. Therefore, we should be scheduled to the + // predecessor of the join, since this code can run in + // parallel. + if !found_reduce { + location = control; + } + } + location } }) @@ -104,10 +153,185 @@ pub fn compute_fork_join_nesting( ( id, dom.ascend(id) + // Filter for forks that dominate this control node, .filter(|id| function.nodes[id.idx()].is_fork()) + // where its corresponding join doesn't dominate the control + // node (if so, then this control is after the fork-join). .filter(|fork_id| !dom.does_prop_dom(fork_join_map[&fork_id], id)) .collect(), ) }) .collect() } + +/* + * Find all the reduce-cycles in a function. + */ +pub fn compute_reduce_cycles(function: &Function) -> HashMap<NodeID, Vec<NodeID>> { + let mut result = HashMap::new(); + let mut dfs_visited = bitvec![u8, Lsb0; 0; function.nodes.len()]; + + for id in (0..function.nodes.len()).map(NodeID::new) { + if let Node::Reduce { + control: _, + init: _, + reduct, + } = &function.nodes[id.idx()] + { + // DFS to find data cycle "rooted" at reduce. + dfs_visited.fill(false); + dfs_visited.set(id.idx(), true); + // The stack starts with the reduce node itself and the `reduct` use + // of the reduce node. + let mut dfs_stack = vec![(id, 0), (*reduct, 0)]; + 'dfs: while let Some((node_id, use_idx)) = dfs_stack.pop() { + if node_id == id { + // If we returned to the reduce node, then there is no + // cycle. This will be signified by any empty vector in the + // return map. + break; + } + + dfs_visited.set(node_id.idx(), true); + + // If there are further uses... + let uses = get_uses(&function.nodes[node_id.idx()]); + if use_idx < uses.as_ref().len() { + // Push ourselves back on to the stack. + dfs_stack.push((node_id, use_idx + 1)); + + // Check if the use is a data node. + let use_id = uses.as_ref()[use_idx]; + if !function.nodes[use_id.idx()].is_control() { + // If so, check if the next use was already visited. + if !dfs_visited[use_id.idx()] { + // If not, add the use to the stack. + dfs_stack.push((use_id, 0)); + } else if dfs_stack.iter().any(|(id, _)| *id == use_id) { + // If so, and the use is already in the stack, we've + // found a cycle - if the already visited node we + // found isn't the reduce, then there's a cycle not + // involving the reduce, which isn't valid. + assert_eq!( + id, use_id, + "PANIC: Found cycle not containing expected reduce node." + ); + break 'dfs; + } + } + } + } + + result.insert(id, dfs_stack.into_iter().map(|(id, _)| id).collect()); + } + } + + result +} + +pub fn invert_reduce_cycles( + function: &Function, + reduce_cycles: &HashMap<NodeID, Vec<NodeID>>, + join_fork_map: &HashMap<NodeID, NodeID>, + fork_join_nest: &HashMap<NodeID, Vec<NodeID>>, +) -> Vec<Option<NodeID>> { + let mut result: Vec<Option<NodeID>> = vec![None; function.nodes.len()]; + + for (reduce, in_cycle) in reduce_cycles { + for node in in_cycle { + if let Some(old_reduce) = result[node.idx()] { + // A node may be in multiple reduce cycles when there are nested + // fork-joins. In such cases, we pick the more "deeply nested" + // reduce cycle. + let old_join_id = function.nodes[old_reduce.idx()].try_reduce().unwrap().0; + let old_fork_id = join_fork_map[&old_join_id]; + let new_join_id = function.nodes[reduce.idx()].try_reduce().unwrap().0; + let new_fork_id = join_fork_map[&new_join_id]; + + let old_above_new = fork_join_nest[&new_fork_id].contains(&old_fork_id); + let new_above_old = fork_join_nest[&old_fork_id].contains(&new_fork_id); + assert!(old_above_new ^ new_above_old, "PANIC: A node can only be in reduce cycles that are hierarchically related and from different fork-joins."); + if old_above_new { + result[node.idx()] = Some(*reduce); + } + } else { + result[node.idx()] = Some(*reduce); + } + } + } + + result +} + +/* + * Description of a node's placement amongst fork-joins, generated per-node by + * `compute_fork_join_placements`. + */ +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum ForkJoinPlacement { + // The node is not "in" any fork-joins. + Sequential, + // The node is in the "fork" section of the fork/join, marked by fork ID. + Fork(NodeID), + // The node is in the "reduce" section of the fork/join, marked by fork ID. + Reduce(NodeID), +} + +/* + * Find which fork/join each data node is "inside" of, based off of basic block + * scheduling (global code motion information). A data node is either not a part + * of any fork/join, is a part of the "fork" section of a fork/join, or is a + * part of the "reduce" section of a fork/join. The following conditions are + * applied in order to determine which category a data node is in: + * 1. If a data node is contained in a cycle containing a reduce node OR is + * scheduled to the basic block of the join node of the fork/join, the data + * node is in the "reduce" section of that fork/join (more specifically, the + * most deeply nested such reduce node). Otherwise... + * 2. If a data node is scheduled to a control node inside a fork/join, the data + * node is in the "fork" section of that fork/join (more specifically, the + * most deeply nested such fork/join). Otherwise... + * 3. If a data node is not in a "reduce" or "fork" section of any fork/join, it + * is a "sequential" node. + */ +pub fn compute_fork_join_placement( + function: &Function, + fork_join_map: &HashMap<NodeID, NodeID>, + fork_join_nest: &HashMap<NodeID, Vec<NodeID>>, + bbs: &Vec<NodeID>, +) -> Vec<ForkJoinPlacement> { + let mut result = vec![ForkJoinPlacement::Sequential; function.nodes.len()]; + let join_fork_map = fork_join_map + .into_iter() + .map(|(fork, join)| (*join, *fork)) + .collect::<HashMap<_, _>>(); + let reduce_cycles = compute_reduce_cycles(function); + let inverted_reduce_cycles = + invert_reduce_cycles(function, &reduce_cycles, &join_fork_map, fork_join_nest); + + for id in (0..function.nodes.len()).map(NodeID::new) { + // Check condition #1. + if let Some(reduce_id) = &inverted_reduce_cycles[id.idx()] { + let join_id = function.nodes[reduce_id.idx()].try_reduce().unwrap().0; + let fork_id = join_fork_map[&join_id]; + result[id.idx()] = ForkJoinPlacement::Reduce(fork_id); + continue; + } + + if let Some(fork_id) = join_fork_map.get(&bbs[id.idx()]) { + result[id.idx()] = ForkJoinPlacement::Reduce(*fork_id); + continue; + } + + // Check condition #2. + let forks = &fork_join_nest[&bbs[id.idx()]]; + if let Some(fork_id) = forks.get(0) { + result[id.idx()] = ForkJoinPlacement::Fork(*fork_id); + continue; + } + + // Default to condition #3. + result[id.idx()] = ForkJoinPlacement::Sequential; + } + + result +} diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs index ea9c07205029717d700700e8b97db76cd07e989f..688902b6be01c478202adcadac1e94ce5de0e304 100644 --- a/hercules_ir/src/ir.rs +++ b/hercules_ir/src/ir.rs @@ -31,9 +31,9 @@ pub struct Module { * A function has a name, a list of types for its parameters, a single return * type, a list of nodes in its sea-of-nodes style IR, and a number of dynamic * constants. When calling a function, arguments matching the parameter types - * are required, as well as the correct number of dynamic constants. All - * dynamic constants are 64-bit unsigned integers (usize / u64), so it is - * sufficient to merely store how many of them the function takes as arguments. + * are required, as well as the correct number of dynamic constants. All dynamic + * constants are 64-bit unsigned integers (usize / u64), so it is sufficient to + * just store how many of them the function takes as arguments. */ #[derive(Debug, Clone)] pub struct Function { @@ -78,11 +78,7 @@ pub enum Type { * Constants are pretty standard in Hercules IR. Float constants used the * ordered_float crate so that constants can be keys in maps (used for * interning constants during IR construction). Product, summation, and array - * constants all contain their own type. This is only strictly necessary for - * summation types, but provides a nice mechanism for sanity checking for - * product and array types as well. There is also a zero initializer constant, - * which stores its own type as well. The zero value of a summation is defined - * as the zero value of the first variant. + * constants all contain their own type. */ #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Constant { @@ -99,8 +95,8 @@ pub enum Constant { Float64(ordered_float::OrderedFloat<f64>), Product(TypeID, Box<[ConstantID]>), Summation(TypeID, u32, ConstantID), - Array(TypeID, Box<[ConstantID]>), - Zero(TypeID), + // Array constants are always zero. + Array(TypeID), } /* @@ -345,18 +341,9 @@ impl Module { self.write_constant(*field, w)?; write!(w, ")") } - Constant::Array(_, elems) => { - write!(w, "[")?; - for idx in 0..elems.len() { - let elem_cons_id = elems[idx]; - self.write_constant(elem_cons_id, w)?; - if idx + 1 < elems.len() { - write!(w, ", ")?; - } - } - write!(w, "]") + Constant::Array(_) => { + write!(w, "[]") } - Constant::Zero(_) => write!(w, "zero"), }?; Ok(()) @@ -374,18 +361,6 @@ impl Module { Ok(()) } - - /* - * Unfortunately, determining if a constant is an array requires both - * knowledge of constants and types, due to zero initializer constants. - */ - pub fn is_array_constant(&self, cons_id: ConstantID) -> bool { - if let Constant::Zero(ty_id) = self.constants[cons_id.idx()] { - self.types[ty_id.idx()].is_array() - } else { - self.constants[cons_id.idx()].is_strictly_array() - } - } } /* @@ -463,7 +438,7 @@ pub fn constants_bottom_up(constants: &Vec<Constant>) -> impl Iterator<Item = Co continue; } match &constants[id.idx()] { - Constant::Product(_, children) | Constant::Array(_, children) => { + Constant::Product(_, children) => { // We have to yield the children of this node before // this node itself. We keep track of which nodes have // yielded using visited. @@ -693,16 +668,9 @@ impl Type { } } -pub fn element_type(mut ty: TypeID, types: &Vec<Type>) -> TypeID { - while let Type::Array(elem, _) = types[ty.idx()] { - ty = elem; - } - ty -} - impl Constant { - pub fn is_strictly_array(&self) -> bool { - if let Constant::Array(_, _) = self { + pub fn is_array(&self) -> bool { + if let Constant::Array(_) = self { true } else { false @@ -711,66 +679,23 @@ impl Constant { // A zero constant may need to return constants that don't exist yet, so we // need mutable access to the constants array. - pub fn try_product_fields( - &self, - types: &[Type], - constants: &mut Vec<Constant>, - ) -> Option<Vec<ConstantID>> { + pub fn try_product_fields(&self) -> Option<Vec<ConstantID>> { match self { Constant::Product(_, fields) => Some(fields.iter().map(|x| *x).collect()), - Constant::Zero(ty) => match types[ty.idx()] { - Type::Product(ref fields) => Some( - fields - .iter() - .map(|field_ty| { - let field_constant = Constant::Zero(*field_ty); - if let Some(idx) = constants - .iter() - .position(|constant| *constant == field_constant) - { - ConstantID::new(idx) - } else { - let id = ConstantID::new(constants.len()); - constants.push(field_constant); - id - } - }) - .collect(), - ), - _ => None, - }, _ => None, } } - pub fn try_array_type(&self, types: &[Type]) -> Option<TypeID> { - // Need types, since zero initializer may be for a collection type, or - // not. + pub fn try_array_type(&self) -> Option<TypeID> { match self { - Constant::Array(ty, _) => Some(*ty), - Constant::Zero(ty) => { - if types[ty.idx()].is_array() { - Some(*ty) - } else { - None - } - } + Constant::Array(ty) => Some(*ty), _ => None, } } - pub fn try_product_type(&self, types: &[Type]) -> Option<TypeID> { - // Need types, since zero initializer may be for a collection type, or - // not. + pub fn try_product_type(&self) -> Option<TypeID> { match self { Constant::Product(ty, _) => Some(*ty), - Constant::Zero(ty) => { - if types[ty.idx()].is_product() { - Some(*ty) - } else { - None - } - } _ => None, } } @@ -807,7 +732,6 @@ impl Constant { Constant::UnsignedInteger64(0) => true, Constant::Float32(ord) => *ord == ordered_float::OrderedFloat::<f32>(0.0), Constant::Float64(ord) => *ord == ordered_float::OrderedFloat::<f64>(0.0), - Constant::Zero(_) => true, _ => false, } } @@ -838,6 +762,22 @@ impl DynamicConstant { } } + pub fn is_constant(&self) -> bool { + if let DynamicConstant::Constant(_) = self { + true + } else { + false + } + } + + pub fn try_parameter(&self) -> Option<usize> { + if let DynamicConstant::Parameter(v) = self { + Some(*v) + } else { + None + } + } + pub fn try_constant(&self) -> Option<usize> { if let DynamicConstant::Constant(v) = self { Some(*v) @@ -863,6 +803,8 @@ macro_rules! define_pattern_predicate { } impl Index { + define_pattern_predicate!(is_field, Index::Field(_)); + define_pattern_predicate!(is_position, Index::Position(_)); pub fn try_field(&self) -> Option<usize> { if let Index::Field(field) = self { @@ -989,6 +931,43 @@ impl Node { } } + pub fn try_fork(&self) -> Option<(NodeID, DynamicConstantID)> { + if let Node::Fork { control, factor } = self { + Some((*control, *factor)) + } else { + None + } + } + + pub fn try_thread_id(&self) -> Option<NodeID> { + if let Node::ThreadID { control } = self { + Some(*control) + } else { + None + } + } + + pub fn try_join(&self) -> Option<NodeID> { + if let Node::Join { control } = self { + Some(*control) + } else { + None + } + } + + pub fn try_reduce(&self) -> Option<(NodeID, NodeID, NodeID)> { + if let Node::Reduce { + control, + init, + reduct, + } = self + { + Some((*control, *init, *reduct)) + } else { + None + } + } + pub fn try_constant(&self) -> Option<ConstantID> { if let Node::Constant { id } = self { Some(*id) @@ -1285,265 +1264,34 @@ macro_rules! define_id_type { }; } -define_id_type!(FunctionID); -define_id_type!(NodeID); -define_id_type!(TypeID); -define_id_type!(ConstantID); -define_id_type!(DynamicConstantID); - -/* - * Sometimes, it's useful to debug print out a module. This code prints out a - * module in (approximately) the same textual format as is parsed in parse.rs. - */ -use std::fmt::Display; -use std::fmt::Formatter; - -impl Display for Module { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - for func in self.functions.iter() { - func.ir_fmt(f, self)?; - write!(f, "\n")?; - } - Ok(()) - } -} - -/* - * When printing out objects in a module, we may need to refer back (upwards) to - * other objects in the module. Display doesn't let us do that, so we make our - * own trait. - */ -trait IRDisplay { - fn ir_fmt(&self, f: &mut Formatter<'_>, module: &Module) -> std::fmt::Result; -} - -impl IRDisplay for Function { - fn ir_fmt(&self, f: &mut Formatter<'_>, module: &Module) -> std::fmt::Result { - write!(f, "fn {}<{}>(", self.name, self.num_dynamic_constants)?; - - for (idx, typ) in self.param_types.iter().enumerate() { - write!(f, "arg_{} : ", idx)?; - module.write_type(*typ, f)?; - if idx + 1 < self.param_types.len() { - write!(f, ", ")?; - } - } - - write!(f, ") -> ")?; - module.write_type(self.return_type, f)?; - - write!(f, "\n")?; - - for (idx, node) in self.nodes.iter().enumerate() { - write!(f, "\tvar_{} = ", idx)?; - node.ir_fmt(f, module)?; - write!(f, "\n")?; - } - - Ok(()) - } -} - -impl IRDisplay for Node { - fn ir_fmt(&self, f: &mut Formatter<'_>, module: &Module) -> std::fmt::Result { - match self { - Node::Start => { - write!(f, "start") - } - Node::Region { preds } => { - write!(f, "region(")?; - for (idx, pred) in preds.iter().enumerate() { - write!(f, "var_{}", pred.0)?; - if idx + 1 < preds.len() { - write!(f, ", ")?; - } - } - write!(f, ")") - } - Node::If { control, cond } => { - write!(f, "if(var_{}, var_{})", control.0, cond.0) - } - Node::Match { control, sum } => { - write!(f, "match(var_{}, var_{})", control.0, sum.0) - } - Node::Fork { control, factor } => { - write!(f, "fork(var_{}, ", control.0)?; - module.write_dynamic_constant(*factor, f)?; - write!(f, ")") - } - Node::Join { control } => { - write!(f, "join(var_{})", control.0) - } - Node::Phi { control, data } => { - write!(f, "phi(var_{}", control.0)?; - for val in data.iter() { - write!(f, ", var_{}", val.0)?; - } - write!(f, ")") - } - Node::ThreadID { control } => { - write!(f, "thread_id(var_{})", control.0) - } - Node::Reduce { - control, - init, - reduct, - } => { - write!( - f, - "reduce(var_{}, var_{}, var_{})", - control.0, init.0, reduct.0 - ) - } - Node::Return { control, data } => { - write!(f, "return(var_{}, var_{})", control.0, data.0) - } - Node::Parameter { index } => { - write!(f, "arg_{}", index) - } - Node::Constant { id } => { - write!(f, "constant(")?; - module.constants[id.idx()].ir_fmt(f, module)?; - write!(f, ")") - } - Node::DynamicConstant { id } => { - write!(f, "dynamic_constant(")?; - module.write_dynamic_constant(*id, f)?; - write!(f, ")") - } - Node::Unary { input, op } => { - write!(f, "{}(var_{})", op.lower_case_name(), input.0) - } - Node::Binary { left, right, op } => { - write!( - f, - "{}(var_{}, var_{})", - op.lower_case_name(), - left.0, - right.0 - ) - } - Node::Call { - function, - dynamic_constants, - args, - } => { - write!(f, "call<")?; - for (idx, dyn_const) in dynamic_constants.iter().enumerate() { - module.write_dynamic_constant(*dyn_const, f)?; - if idx + 1 < dynamic_constants.len() { - write!(f, ", ")?; - } - } - write!(f, ">({}", module.functions[function.0 as usize].name)?; - for arg in args.iter() { - write!(f, ", var_{}", arg.0)?; - } - write!(f, ")") - } - Node::Read { collect, indices } => { - write!(f, "read(var_{}", collect.0)?; - for idx in indices.iter() { - write!(f, ", ")?; - idx.ir_fmt(f, module)?; - } - write!(f, ")") - } - Node::Write { - collect, - data, - indices, - } => { - write!(f, "write(var_{}, var_{}", collect.0, data.0)?; - for idx in indices.iter() { - write!(f, ", ")?; - idx.ir_fmt(f, module)?; - } - write!(f, ")") - } - Node::Ternary { - first, - second, - third, - op, - } => { - write!( - f, - "{}(var_{}, var_{}, var_{})", - op.lower_case_name(), - first.0, - second.0, - third.0 - ) - } - Node::Projection { control, selection } => { - write!(f, "projection({}, {})", control.0, selection) - } - } - } -} +#[macro_export] +macro_rules! define_dual_id_type { + ($x: ident) => { + #[derive( + Debug, + Default, + Clone, + Copy, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, + serde::Serialize, + serde::Deserialize, + )] + pub struct $x(u32, u32); -impl IRDisplay for Index { - fn ir_fmt(&self, f: &mut Formatter<'_>, _module: &Module) -> std::fmt::Result { - match self { - Index::Field(idx) => write!(f, "field({})", idx), - Index::Variant(idx) => write!(f, "variant({})", idx), - Index::Position(indices) => { - write!(f, "position(")?; - for (i, idx) in indices.iter().enumerate() { - write!(f, "var_{}", idx.0)?; - if i + 1 < indices.len() { - write!(f, ", ")?; - } - } - write!(f, ")") + impl $x { + pub fn new(x: usize, y: usize) -> Self { + $x(x as u32, y as u32) } } - } + }; } -impl IRDisplay for Constant { - fn ir_fmt(&self, f: &mut Formatter<'_>, module: &Module) -> std::fmt::Result { - match self { - Constant::Boolean(v) => write!(f, "{} : bool", v), - Constant::Integer8(v) => write!(f, "{} : i8", v), - Constant::Integer16(v) => write!(f, "{} : i16", v), - Constant::Integer32(v) => write!(f, "{} : i32", v), - Constant::Integer64(v) => write!(f, "{} : i64", v), - Constant::UnsignedInteger8(v) => write!(f, "{} : u8", v), - Constant::UnsignedInteger16(v) => write!(f, "{} : u16", v), - Constant::UnsignedInteger32(v) => write!(f, "{} : u32", v), - Constant::UnsignedInteger64(v) => write!(f, "{} : u64", v), - Constant::Float32(v) => write!(f, "{} : f32", v), - Constant::Float64(v) => write!(f, "{} : f64", v), - Constant::Product(t, cnsts) => { - write!(f, "(")?; - for i in 0..cnsts.len() { - module.constants[cnsts[i].idx()].ir_fmt(f, module)?; - write!(f, ", ")?; - } - write!(f, ") :")?; - module.write_type(*t, f) - } - Constant::Summation(t, tag, cnst) => { - write!(f, "{}(", tag)?; - module.constants[cnst.idx()].ir_fmt(f, module)?; - write!(f, ") : ")?; - module.write_type(*t, f) - } - Constant::Array(t, cnsts) => { - write!(f, "{{")?; - for i in 0..cnsts.len() { - module.constants[cnsts[i].idx()].ir_fmt(f, module)?; - write!(f, ", ")?; - } - write!(f, "}} : ")?; - module.write_type(*t, f) - } - Constant::Zero(t) => { - write!(f, "zero : ")?; - module.write_type(*t, f) - } - } - } -} +define_id_type!(FunctionID); +define_id_type!(NodeID); +define_id_type!(TypeID); +define_id_type!(ConstantID); +define_id_type!(DynamicConstantID); diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs index fc5e397bf4a6848d6e91969779f90906dbaa2021..70eb270ca7d02827b1ef5f21b739ea643d4836b8 100644 --- a/hercules_ir/src/parse.rs +++ b/hercules_ir/src/parse.rs @@ -221,7 +221,7 @@ fn parse_function<'a>( let (ir_text, return_type) = parse_type_id(ir_text, context)?; let (ir_text, nodes) = nom::multi::many1(|x| parse_node(x, context))(ir_text)?; - // nodes, as returned by parsing, is in parse order, which may differ from + // `nodes`, as returned by parsing, is in parse order, which may differ from // the order dictated by NodeIDs in the node name intern map. let mut fixed_nodes = vec![Node::Start; context.borrow().node_ids.len()]; for (name, node) in nodes { @@ -270,7 +270,18 @@ fn parse_node<'a>( ir_text: &'a str, context: &RefCell<Context<'a>>, ) -> nom::IResult<&'a str, (&'a str, Node)> { - let ir_text = nom::character::complete::multispace0(ir_text)?.0; + let mut ir_text = nom::character::complete::multispace0(ir_text)?.0; + if let Ok((comment_ir_text, _)) = + nom::character::complete::char::<&'a str, (&'a str, _)>('#')(ir_text) + { + let comment_ir_text = + nom::bytes::complete::take_while(|c| !nom::character::is_newline(c as u8))( + comment_ir_text, + )? + .0; + let comment_ir_text = nom::character::complete::line_ending(comment_ir_text)?.0; + ir_text = nom::character::complete::multispace0(comment_ir_text)?.0; + } let (ir_text, node_name) = parse_identifier(ir_text)?; let ir_text = nom::character::complete::multispace0(ir_text)?.0; let ir_text = nom::character::complete::char('=')(ir_text)?.0; @@ -718,7 +729,6 @@ fn parse_match<'a>( } fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Type> { - // Parser combinators are very convenient, if a bit hard to read. let ir_text = nom::character::complete::multispace0(ir_text)?.0; let (ir_text, ty) = nom::branch::alt(( // Control tokens are parameterized by a list of dynamic constants @@ -893,15 +903,6 @@ fn parse_constant<'a>( ty: Type, context: &RefCell<Context<'a>>, ) -> nom::IResult<&'a str, Constant> { - let ty_id = context.borrow_mut().get_type_id(ty.clone()); - let (ir_text, maybe_constant) = nom::combinator::opt(nom::combinator::map( - nom::bytes::complete::tag("zero"), - |_| Constant::Zero(ty_id), - ))(ir_text)?; - if let Some(cons) = maybe_constant { - return Ok((ir_text, cons)); - } - let (ir_text, constant) = match ty { // There are not control constants. Type::Control(_) => Err(nom::Err::Error(nom::error::Error { @@ -931,12 +932,9 @@ fn parse_constant<'a>( tys, context, )?, - Type::Array(elem_ty, _) => parse_array_constant( - ir_text, - context.borrow_mut().get_type_id(ty.clone()), - elem_ty, - context, - )?, + Type::Array(_, _) => { + parse_array_constant(ir_text, context.borrow_mut().get_type_id(ty.clone()))? + } }; Ok((ir_text, constant)) } @@ -1086,42 +1084,12 @@ fn parse_summation_constant<'a>( Ok((ir_text, Constant::Summation(sum_ty, variant, id))) } -fn parse_array_constant<'a>( - ir_text: &'a str, - array_ty: TypeID, - elem_ty: TypeID, - context: &RefCell<Context<'a>>, -) -> nom::IResult<&'a str, Constant> { +fn parse_array_constant<'a>(ir_text: &'a str, array_ty: TypeID) -> nom::IResult<&'a str, Constant> { let ir_text = nom::character::complete::multispace0(ir_text)?.0; let ir_text = nom::character::complete::char('[')(ir_text)?.0; let ir_text = nom::character::complete::multispace0(ir_text)?.0; - let (ir_text, entries) = nom::multi::separated_list1( - nom::sequence::tuple(( - nom::character::complete::multispace0, - nom::character::complete::char(','), - nom::character::complete::multispace0, - )), - |x| { - parse_constant_id( - x, - context - .borrow() - .reverse_type_map - .get(&elem_ty) - .unwrap() - .clone(), - context, - ) - }, - )(ir_text)?; - let ir_text = nom::character::complete::multispace0(ir_text)?.0; let ir_text = nom::character::complete::char(']')(ir_text)?.0; - - // Will check that entries is the correct size during typechecking. - Ok(( - ir_text, - Constant::Array(array_ty, entries.into_boxed_slice()), - )) + Ok((ir_text, Constant::Array(array_ty))) } fn parse_identifier<'a>(ir_text: &'a str) -> nom::IResult<&'a str, &'a str> { diff --git a/hercules_ir/src/schedule.rs b/hercules_ir/src/schedule.rs index 3a240a250d8a94cd5190b6cb2f1873f7ca5cd2a4..fb839b5c4a469e8f6a9b8a718f361f959e0c85e7 100644 --- a/hercules_ir/src/schedule.rs +++ b/hercules_ir/src/schedule.rs @@ -171,6 +171,274 @@ impl Plan { num_partitions, } } + + /* + * Verify that a partitioning is valid. + */ + pub fn verify_partitioning( + &self, + function: &Function, + def_use: &ImmutableDefUseMap, + fork_join_map: &HashMap<NodeID, NodeID>, + ) { + let partition_to_node_ids = self.invert_partition_map(); + + // First, verify that there is at most one control node in the partition + // with a control use outside the partition. A partition may only have + // zero such control nodes if it contains the start node. This also + // checks that each partition has at least one control node. + for nodes_in_partition in partition_to_node_ids.iter() { + let contains_start = nodes_in_partition + .iter() + .any(|id| function.nodes[id.idx()] == Node::Start); + let num_inter_partition_control_uses = nodes_in_partition + .iter() + .filter(|id| { + // An inter-partition control use is a control node, + function.nodes[id.idx()].is_control() + // where one of its uses, + && get_uses(&function.nodes[id.idx()]) + .as_ref() + .into_iter() + .any(|use_id| { + // that is itself a control node as well, + function.nodes[use_id.idx()].is_control() + // is in a different partition. + && self.partitions[use_id.idx()] != self.partitions[id.idx()] + }) + }) + .count(); + + assert!(num_inter_partition_control_uses + contains_start as usize == 1, "PANIC: Found an invalid partition based on the inter-partition control use criteria."); + } + + // Second, verify that fork-joins are not split amongst partitions. + for id in (0..function.nodes.len()).map(NodeID::new) { + if function.nodes[id.idx()].is_fork() { + let fork_part = self.partitions[id.idx()]; + + // The join must be in the same partition. + let join = fork_join_map[&id]; + assert_eq!( + fork_part, + self.partitions[join.idx()], + "PANIC: Join is in a different partition than its corresponding fork." + ); + + // The thread IDs must be in the same partition. + def_use + .get_users(id) + .into_iter() + .filter(|user| function.nodes[user.idx()].is_thread_id()) + .for_each(|thread_id| { + assert_eq!( + fork_part, + self.partitions[thread_id.idx()], + "PANIC: Thread ID is in a different partition than its fork use." + ) + }); + + // The reduces must be in the same partition. + def_use + .get_users(join) + .into_iter() + .filter(|user| function.nodes[user.idx()].is_reduce()) + .for_each(|reduce| { + assert_eq!( + fork_part, + self.partitions[reduce.idx()], + "PANIC: Reduce is in a different partition than its join use." + ) + }); + } + } + + // Third, verify that every data node has proper dominance relations + // with respect to the partitioning. In particular: + // 1. Every non-phi data node should be in a partition that is dominated + // by the partitions of every one of its uses. + // 2. Every data node should be in a partition that dominates the + // partitions of every one of its non-phi users. + // Compute a dominance relation between the partitions by constructing a + // partition control graph. + let partition_graph = partition_graph(function, def_use, self); + let dom = dominator(&partition_graph, NodeID::new(self.partitions[0].idx())); + for id in (0..function.nodes.len()).map(NodeID::new) { + let part = self.partitions[id.idx()]; + + // Check condition #1. + if !function.nodes[id.idx()].is_phi() { + let uses = get_uses(&function.nodes[id.idx()]); + for use_id in uses.as_ref() { + let use_part = self.partitions[use_id.idx()]; + assert!(dom.does_dom(NodeID::new(use_part.idx()), NodeID::new(part.idx())), "PANIC: A data node has a partition use that doesn't dominate its partition."); + } + } + + // Check condition #2. + let users = def_use.get_users(id); + for user_id in users.as_ref() { + if !function.nodes[user_id.idx()].is_phi() { + let user_part = self.partitions[user_id.idx()]; + assert!(dom.does_dom(NodeID::new(part.idx()), NodeID::new(user_part.idx())), "PANIC: A data node has a partition user that isn't dominated by its partition."); + } + } + } + + // Fourth, verify that every partition has at least one partition + // successor xor has at least one return node. + for partition_idx in 0..self.num_partitions { + let has_successor = partition_graph.succs(NodeID::new(partition_idx)).count() > 0; + let has_return = partition_to_node_ids[partition_idx] + .iter() + .any(|node_id| function.nodes[node_id.idx()].is_return()); + assert!(has_successor ^ has_return, "PANIC: Found an invalid partition based on the partition return / control criteria."); + } + } + + /* + * Compute the top node for each partition. + */ + pub fn compute_top_nodes( + &self, + function: &Function, + control_subgraph: &Subgraph, + inverted_partition_map: &Vec<Vec<NodeID>>, + ) -> Vec<NodeID> { + inverted_partition_map + .into_iter() + .enumerate() + .map(|(part_idx, part)| { + // For each partition, find the "top" node. + *part + .iter() + .filter(move |id| { + // The "top" node is a control node having at least one + // control predecessor in another partition, or is a + // start node. Every predecessor in the control subgraph + // is a control node. + function.nodes[id.idx()].is_start() + || (function.nodes[id.idx()].is_control() + && control_subgraph + .preds(**id) + .filter(|pred_id| { + self.partitions[pred_id.idx()].idx() != part_idx + }) + .count() + > 0) + }) + // We assume here there is exactly one such top node per + // partition. Verify a partitioning with + // `verify_partitioning` before calling this method. + .next() + .unwrap() + }) + .collect() + } + + /* + * Compute the data inputs of each partition. + */ + pub fn compute_data_inputs(&self, function: &Function) -> Vec<Vec<NodeID>> { + let mut data_inputs = vec![vec![]; self.num_partitions]; + + // First consider the non-phi nodes in each partition. + for id in (0..function.nodes.len()).map(NodeID::new) { + if function.nodes[id.idx()].is_phi() { + continue; + } + + let data_inputs = &mut data_inputs[self.partitions[id.idx()].idx()]; + let uses = get_uses(&function.nodes[id.idx()]); + for use_id in uses.as_ref() { + // For every non-phi node, check each of its data uses. If the + // node and its use are in different partitions, then the use is + // a data input for the partition of the node. Also, don't add + // the same node to the data inputs list twice. + if !function.nodes[use_id.idx()].is_control() + && self.partitions[id.idx()] != self.partitions[use_id.idx()] + && !data_inputs.contains(use_id) + { + data_inputs.push(*use_id); + } + } + } + + // Second consider the phi nodes in each partition. + for id in (0..function.nodes.len()).map(NodeID::new) { + if !function.nodes[id.idx()].is_phi() { + continue; + } + + let data_inputs = &mut data_inputs[self.partitions[id.idx()].idx()]; + let uses = get_uses(&function.nodes[id.idx()]); + for use_id in uses.as_ref() { + // For every phi node, if any one of its uses is defined in a + // different partition, then the phi node itself, not its + // outside uses, is considered a data input. This is because a + // phi node whose uses are all in a different partition should + // be lowered to a single parameter to the corresponding simple + // IR function. Note that for a phi node with some uses outside + // and some uses inside the partition, the uses outside the + // partition become a single parameter to the simple IR + // function, and that parameter and all of the "inside" uses + // become the inputs to a phi inside the simple IR function. + if self.partitions[id.idx()] != self.partitions[use_id.idx()] + && !data_inputs.contains(&id) + { + data_inputs.push(id); + break; + } + } + } + + // Sort the node IDs to keep a consistent interface between partitions. + for data_inputs in &mut data_inputs { + data_inputs.sort(); + } + data_inputs + } + + /* + * Compute the data outputs of each partition. + */ + pub fn compute_data_outputs( + &self, + function: &Function, + def_use: &ImmutableDefUseMap, + ) -> Vec<Vec<NodeID>> { + let mut data_outputs = vec![vec![]; self.num_partitions]; + + for id in (0..function.nodes.len()).map(NodeID::new) { + if function.nodes[id.idx()].is_control() { + continue; + } + + let data_outputs = &mut data_outputs[self.partitions[id.idx()].idx()]; + let users = def_use.get_users(id); + for user_id in users.as_ref() { + // For every data node, check each of its users. If the node and + // its user are in different partitions, then the node is a data + // output for the partition of the node. Also, don't add the + // same node to the data outputs list twice. It doesn't matter + // how this data node is being used - all that matters is that + // it itself is a data node, and that it has a user outside the + // partition. This makes the code simpler than the inputs case. + if self.partitions[id.idx()] != self.partitions[user_id.idx()] + && !data_outputs.contains(&id) + { + data_outputs.push(id); + break; + } + } + } + + // Sort the node IDs to keep a consistent interface between partitions. + for data_outputs in &mut data_outputs { + data_outputs.sort(); + } + data_outputs + } } /* diff --git a/hercules_ir/src/subgraph.rs b/hercules_ir/src/subgraph.rs index 6d76f6fcc6b63cc97bfaf468f551836339a89656..62ef123e7528cf5526de59017cb31503113456c5 100644 --- a/hercules_ir/src/subgraph.rs +++ b/hercules_ir/src/subgraph.rs @@ -4,7 +4,7 @@ use std::collections::HashMap; /* * In various parts of the compiler, we want to consider a subset of a complete - * function graph. For example, for dominators, we often only want to find the + * function graph. For example, for dominators, we often want to find the * dominator tree of only the control subgraph. */ #[derive(Debug, Clone)] @@ -245,3 +245,72 @@ pub fn control_subgraph(function: &Function, def_use: &ImmutableDefUseMap) -> Su function.nodes[node.idx()].is_control() }) } + +/* + * Construct a subgraph representing the control relations between partitions. + * Technically, this isn't a "sub"graph of the function graph, since partition + * nodes don't correspond to nodes in the original function. + */ +pub fn partition_graph(function: &Function, def_use: &ImmutableDefUseMap, plan: &Plan) -> Subgraph { + let partition_to_node_ids = plan.invert_partition_map(); + + let mut subgraph = Subgraph { + nodes: (0..plan.num_partitions).map(NodeID::new).collect(), + node_numbers: (0..plan.num_partitions) + .map(|idx| (NodeID::new(idx), idx as u32)) + .collect(), + first_forward_edges: vec![], + forward_edges: vec![], + first_backward_edges: vec![], + backward_edges: vec![], + original_num_nodes: plan.num_partitions as u32, + }; + + // Step 1: collect backward edges from use info. + for partition in partition_to_node_ids.iter() { + // Record the source of the edges (the current partition). + let old_num_edges = subgraph.backward_edges.len(); + subgraph.first_backward_edges.push(old_num_edges as u32); + for node in partition.iter() { + // Look at all the uses from nodes in that partition. + let uses = get_uses(&function.nodes[node.idx()]); + for use_id in uses.as_ref() { + // Add a backward edge to any different partition we are using + // and don't add duplicate backward edges. + if plan.partitions[use_id.idx()] != plan.partitions[node.idx()] + && !subgraph.backward_edges[old_num_edges..] + .contains(&(plan.partitions[use_id.idx()].idx() as u32)) + { + subgraph + .backward_edges + .push(plan.partitions[use_id.idx()].idx() as u32); + } + } + } + } + + // Step 2: collect forward edges from user (def_use) info. + for partition in partition_to_node_ids.iter() { + // Record the source of the edges (the current partition). + let old_num_edges = subgraph.forward_edges.len(); + subgraph.first_forward_edges.push(old_num_edges as u32); + for node in partition.iter() { + // Look at all the uses from nodes in that partition. + let users = def_use.get_users(*node); + for user_id in users.as_ref() { + // Add a forward edge to any different partition that we are a + // user of and don't add duplicate forward edges. + if plan.partitions[user_id.idx()] != plan.partitions[node.idx()] + && !subgraph.forward_edges[old_num_edges..] + .contains(&(plan.partitions[user_id.idx()].idx() as u32)) + { + subgraph + .forward_edges + .push(plan.partitions[user_id.idx()].idx() as u32); + } + } + } + } + + subgraph +} diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs index f2e4dbaa9f85123eecaa7972181ea775fab2c74b..140edfe02a4efb7be355ddcf06d75512adc71bd3 100644 --- a/hercules_ir/src/typecheck.rs +++ b/hercules_ir/src/typecheck.rs @@ -557,21 +557,8 @@ fn typeflow( } } // Array typechecking also consists of validating the number of constant elements. - Constant::Array(id, ref elems) => { - if let Type::Array(_, dc_ids) = &types[id.idx()] { - let mut total_num_elems = 1; - for dc_id in dc_ids.iter() { - total_num_elems *= if let DynamicConstant::Constant(extent) = - dynamic_constants[dc_id.idx()] - { - extent - } else { - return Error(String::from("Array constant type must reference only constant valued dynamic constants.")); - }; - } - if total_num_elems != 1 && total_num_elems != elems.len() { - return Error(String::from("Array constant must have a compatible amount of elements as the extent of the array.")); - } + Constant::Array(id) => { + if let Type::Array(_, _) = &types[id.idx()] { Concrete(id) } else { Error(String::from( @@ -579,8 +566,6 @@ fn typeflow( )) } } - // Zero constants need to store their type, and we trust it. - Constant::Zero(id) => Concrete(id), } } Node::DynamicConstant { id } => { diff --git a/hercules_opt/src/ccp.rs b/hercules_opt/src/ccp.rs index 402e24757a014e5ace86ec6414f32d9fc7eacf23..a3506948ee32db45e30d50bae10d4a257cf8b2bc 100644 --- a/hercules_opt/src/ccp.rs +++ b/hercules_opt/src/ccp.rs @@ -456,7 +456,6 @@ fn ccp_flow_function( (UnaryOperator::Neg, Constant::Integer64(val)) => ConstantLattice::Constant(Constant::Integer64(-val)), (UnaryOperator::Neg, Constant::Float32(val)) => ConstantLattice::Constant(Constant::Float32(-val)), (UnaryOperator::Neg, Constant::Float64(val)) => ConstantLattice::Constant(Constant::Float64(-val)), - (UnaryOperator::Neg, Constant::Zero(id)) => ConstantLattice::Constant(Constant::Zero(*id)), (UnaryOperator::Cast(_), _) => ConstantLattice::Bottom, _ => panic!("Unsupported combination of unary operation and constant value. Did typechecking succeed?") } @@ -485,32 +484,6 @@ fn ccp_flow_function( ConstantLattice::Constant(right_cons), ) = (left_constant, right_constant) { - let type_to_zero_cons = |ty_id: TypeID| { - match types[ty_id.idx()] { - Type::Boolean => Constant::Boolean(false), - Type::Integer8 => Constant::Integer8(0), - Type::Integer16 => Constant::Integer16(0), - Type::Integer32 => Constant::Integer32(0), - Type::Integer64 => Constant::Integer64(0), - Type::UnsignedInteger8 => Constant::UnsignedInteger8(0), - Type::UnsignedInteger16 => Constant::UnsignedInteger16(0), - Type::UnsignedInteger32 => Constant::UnsignedInteger32(0), - Type::UnsignedInteger64 => Constant::UnsignedInteger64(0), - Type::Float32 => Constant::Float32(ordered_float::OrderedFloat::<f32>(0.0)), - Type::Float64 => Constant::Float64(ordered_float::OrderedFloat::<f64>(0.0)), - _ => panic!("Unsupported combination of binary operation and constant values. Did typechecking succeed?") - } - }; - let left_cons = if let Constant::Zero(id) = left_cons { - type_to_zero_cons(*id) - } else { - left_cons.clone() - }; - let right_cons = if let Constant::Zero(id) = right_cons { - type_to_zero_cons(*id) - } else { - right_cons.clone() - }; let new_cons = match (op, left_cons, right_cons) { (BinaryOperator::Add, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val + right_val), (BinaryOperator::Add, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val + right_val), @@ -520,8 +493,8 @@ fn ccp_flow_function( (BinaryOperator::Add, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val + right_val), (BinaryOperator::Add, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val + right_val), (BinaryOperator::Add, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val + right_val), - (BinaryOperator::Add, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(left_val + right_val), - (BinaryOperator::Add, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(left_val + right_val), + (BinaryOperator::Add, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(*left_val + *right_val), + (BinaryOperator::Add, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(*left_val + *right_val), (BinaryOperator::Sub, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val - right_val), (BinaryOperator::Sub, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val - right_val), (BinaryOperator::Sub, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val - right_val), @@ -530,8 +503,8 @@ fn ccp_flow_function( (BinaryOperator::Sub, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val - right_val), (BinaryOperator::Sub, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val - right_val), (BinaryOperator::Sub, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val - right_val), - (BinaryOperator::Sub, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(left_val - right_val), - (BinaryOperator::Sub, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(left_val - right_val), + (BinaryOperator::Sub, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(*left_val - *right_val), + (BinaryOperator::Sub, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(*left_val - *right_val), (BinaryOperator::Mul, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val * right_val), (BinaryOperator::Mul, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val * right_val), (BinaryOperator::Mul, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val * right_val), @@ -540,8 +513,8 @@ fn ccp_flow_function( (BinaryOperator::Mul, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val * right_val), (BinaryOperator::Mul, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val * right_val), (BinaryOperator::Mul, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val * right_val), - (BinaryOperator::Mul, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(left_val * right_val), - (BinaryOperator::Mul, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(left_val * right_val), + (BinaryOperator::Mul, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(*left_val * *right_val), + (BinaryOperator::Mul, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(*left_val * *right_val), (BinaryOperator::Div, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val / right_val), (BinaryOperator::Div, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val / right_val), (BinaryOperator::Div, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val / right_val), @@ -550,8 +523,8 @@ fn ccp_flow_function( (BinaryOperator::Div, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val / right_val), (BinaryOperator::Div, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val / right_val), (BinaryOperator::Div, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val / right_val), - (BinaryOperator::Div, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(left_val / right_val), - (BinaryOperator::Div, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(left_val / right_val), + (BinaryOperator::Div, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(*left_val / *right_val), + (BinaryOperator::Div, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(*left_val / *right_val), (BinaryOperator::Rem, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val % right_val), (BinaryOperator::Rem, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val % right_val), (BinaryOperator::Rem, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val % right_val), @@ -560,8 +533,8 @@ fn ccp_flow_function( (BinaryOperator::Rem, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val % right_val), (BinaryOperator::Rem, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val % right_val), (BinaryOperator::Rem, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val % right_val), - (BinaryOperator::Rem, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(left_val % right_val), - (BinaryOperator::Rem, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(left_val % right_val), + (BinaryOperator::Rem, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(*left_val % *right_val), + (BinaryOperator::Rem, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(*left_val % *right_val), (BinaryOperator::LT, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Boolean(left_val < right_val), (BinaryOperator::LT, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Boolean(left_val < right_val), (BinaryOperator::LT, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Boolean(left_val < right_val), @@ -606,7 +579,7 @@ fn ccp_flow_function( // need to unpack the constants. (BinaryOperator::EQ, left_val, right_val) => Constant::Boolean(left_val == right_val), (BinaryOperator::NE, left_val, right_val) => Constant::Boolean(left_val != right_val), - (BinaryOperator::Or, Constant::Boolean(left_val), Constant::Boolean(right_val)) => Constant::Boolean(left_val || right_val), + (BinaryOperator::Or, Constant::Boolean(left_val), Constant::Boolean(right_val)) => Constant::Boolean(*left_val || *right_val), (BinaryOperator::Or, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val | right_val), (BinaryOperator::Or, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val | right_val), (BinaryOperator::Or, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val | right_val), @@ -615,7 +588,7 @@ fn ccp_flow_function( (BinaryOperator::Or, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val | right_val), (BinaryOperator::Or, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val | right_val), (BinaryOperator::Or, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val | right_val), - (BinaryOperator::And, Constant::Boolean(left_val), Constant::Boolean(right_val)) => Constant::Boolean(left_val && right_val), + (BinaryOperator::And, Constant::Boolean(left_val), Constant::Boolean(right_val)) => Constant::Boolean(*left_val && *right_val), (BinaryOperator::And, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val & right_val), (BinaryOperator::And, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val & right_val), (BinaryOperator::And, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val & right_val), diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs index 58342c5e6107df626cca8f5f240f4d7e76808c05..641b46ab5dcd957f1f9d330e8044a120c5b72b83 100644 --- a/hercules_opt/src/pass.rs +++ b/hercules_opt/src/pass.rs @@ -1,18 +1,13 @@ -// extern crate hercules_cg; extern crate hercules_ir; extern crate postcard; extern crate serde; extern crate take_mut; use std::collections::HashMap; -use std::fs::File; -use std::io::prelude::*; use std::iter::zip; -use std::process::*; use self::serde::Deserialize; -// use self::hercules_cg::*; use self::hercules_ir::*; use crate::*; @@ -61,6 +56,7 @@ pub struct PassManager { pub loops: Option<Vec<LoopTree>>, pub antideps: Option<Vec<Vec<(NodeID, NodeID)>>>, pub bbs: Option<Vec<Vec<NodeID>>>, + pub fork_join_placements: Option<Vec<Vec<ForkJoinPlacement>>>, // Current plan. Keep track of the last time the plan was updated. pub plans: Option<Vec<Plan>>, @@ -83,6 +79,7 @@ impl PassManager { antideps: None, bbs: None, plans: None, + fork_join_placements: None, } } @@ -254,6 +251,27 @@ impl PassManager { } } + pub fn make_fork_join_placements(&mut self) { + if self.fork_join_placements.is_none() { + self.make_fork_join_maps(); + self.make_fork_join_nests(); + self.make_bbs(); + let fork_join_maps = self.fork_join_maps.as_ref().unwrap().iter(); + let fork_join_nests = self.fork_join_nests.as_ref().unwrap().iter(); + let bbs = self.bbs.as_ref().unwrap().iter(); + self.fork_join_placements = Some( + zip( + self.module.functions.iter(), + zip(fork_join_maps, zip(fork_join_nests, bbs)), + ) + .map(|(function, (fork_join_map, (fork_join_nest, bb)))| { + compute_fork_join_placement(function, fork_join_map, fork_join_nest, bb) + }) + .collect(), + ); + } + } + pub fn make_plans(&mut self) { if self.plans.is_none() { self.make_reverse_postorders(); @@ -412,6 +430,17 @@ impl PassManager { self.postdoms = Some(postdoms); self.fork_join_maps = Some(fork_join_maps); + // Verify the plan, if it exists. + if let Some(plans) = &self.plans { + for idx in 0..self.module.functions.len() { + plans[idx].verify_partitioning( + &self.module.functions[idx], + &self.def_uses.as_ref().unwrap()[idx], + &self.fork_join_maps.as_ref().unwrap()[idx], + ); + } + } + // Verify doesn't require clearing analysis results. continue; } @@ -420,21 +449,24 @@ impl PassManager { if *force_analyses { self.make_doms(); self.make_fork_join_maps(); + self.make_bbs(); self.make_plans(); + self.make_fork_join_placements(); } xdot_module( &self.module, self.reverse_postorders.as_ref().unwrap(), self.doms.as_ref(), self.fork_join_maps.as_ref(), + self.bbs.as_ref(), self.plans.as_ref(), + self.fork_join_placements.as_ref(), ); // Xdot doesn't require clearing analysis results. continue; } Pass::Codegen(output_file_name) => { - /* self.make_def_uses(); self.make_reverse_postorders(); self.make_typing(); @@ -444,50 +476,51 @@ impl PassManager { self.make_antideps(); self.make_bbs(); self.make_plans(); - - let mut llvm_ir = String::new(); - let manifest = codegen( - &self.module, - self.def_uses.as_ref().unwrap(), - self.reverse_postorders.as_ref().unwrap(), - self.typing.as_ref().unwrap(), - self.control_subgraphs.as_ref().unwrap(), - self.fork_join_maps.as_ref().unwrap(), - self.fork_join_nests.as_ref().unwrap(), - self.antideps.as_ref().unwrap(), - self.bbs.as_ref().unwrap(), - self.plans.as_ref().unwrap(), - &mut llvm_ir, - ) - .unwrap(); - - // Compile LLVM IR into ELF object. - let llc_process = Command::new("llc") - .arg("-filetype=obj") - .arg("-O3") - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .spawn() - .unwrap(); - llc_process - .stdin - .as_ref() - .unwrap() - .write(llvm_ir.as_bytes()) - .unwrap(); - let elf_object = llc_process.wait_with_output().unwrap().stdout; - - // Package manifest and ELF object into the same file. - let hbin_module = (manifest, elf_object); - let hbin_contents: Vec<u8> = postcard::to_allocvec(&hbin_module).unwrap(); - - let mut file = - File::create(output_file_name).expect("PANIC: Unable to open output file."); - file.write_all(&hbin_contents) - .expect("PANIC: Unable to write output file contents."); - - // Codegen doesn't require clearing analysis results.*/ - continue; + self.make_fork_join_placements(); + + //let smodule = simple_compile( + // &self.module, + // self.def_uses.as_ref().unwrap(), + // self.reverse_postorders.as_ref().unwrap(), + // self.typing.as_ref().unwrap(), + // self.control_subgraphs.as_ref().unwrap(), + // self.fork_join_maps.as_ref().unwrap(), + // self.fork_join_nests.as_ref().unwrap(), + // self.antideps.as_ref().unwrap(), + // self.bbs.as_ref().unwrap(), + // self.plans.as_ref().unwrap(), + // self.fork_join_placements.as_ref().unwrap(), + //); + //println!("{:#?}", smodule); + + //// Compile LLVM IR into ELF object. + //let llc_process = Command::new("llc") + // .arg("-filetype=obj") + // .arg("-O3") + // .stdin(Stdio::piped()) + // .stdout(Stdio::piped()) + // .spawn() + // .unwrap(); + //llc_process + // .stdin + // .as_ref() + // .unwrap() + // .write(llvm_ir.as_bytes()) + // .unwrap(); + //let elf_object = llc_process.wait_with_output().unwrap().stdout; + // + //// Package manifest and ELF object into the same file. + //let hbin_module = (manifest, elf_object); + //let hbin_contents: Vec<u8> = postcard::to_allocvec(&hbin_module).unwrap(); + // + //let mut file = + // File::create(output_file_name).expect("PANIC: Unable to open output file."); + //file.write_all(&hbin_contents) + // .expect("PANIC: Unable to write output file contents."); + // + + // Codegen doesn't require clearing analysis results. + continue; } } diff --git a/hercules_opt/src/sroa.rs b/hercules_opt/src/sroa.rs index ef3eceab898147011f1ab932686afbbda6b4fc45..c48b85ae9d838b22700b55cca78550d6e6a034f2 100644 --- a/hercules_opt/src/sroa.rs +++ b/hercules_opt/src/sroa.rs @@ -181,7 +181,6 @@ pub fn sroa( } }) .collect(); - println!("{:?}", to_sroa); // Perform SROA. TODO: repair def-use when there are multiple product // constants to SROA away. @@ -189,14 +188,10 @@ pub fn sroa( for (constant_node_id, constant_id) in to_sroa { // Get the field constants to replace the product constant with. let product_constant = constants[constant_id.idx()].clone(); - let constant_fields = product_constant - .try_product_fields(types, constants) - .unwrap(); - println!("{:?}", constant_fields); + let constant_fields = product_constant.try_product_fields().unwrap(); // DFS to find all data nodes that use the product constant. let to_replace = sroa_dfs(constant_node_id, function, def_use); - println!("{:?}", to_replace); // Assemble a mapping from old nodes IDs acting on the product constant // to new nodes IDs operating on the field constants. diff --git a/hercules_samples/fork_join.hir b/hercules_samples/fork_join.hir index 99e95829578f5893ce507e9643a97727fb85ccc2..fe90da4d85bfcb89a51c2b3c39a30e9af7dd6139 100644 --- a/hercules_samples/fork_join.hir +++ b/hercules_samples/fork_join.hir @@ -1,8 +1,16 @@ fn fork_join<1>() -> u64 - f_ctrl = fork(start, #0) - j_ctrl = join(f_ctrl) + f_ctrl1 = fork(start, #0) + f_ctrl2 = fork(f_ctrl1, #0) + j_ctrl2 = join(f_ctrl2) + j_ctrl1 = join(j_ctrl2) zero = constant(u64, 0) - x = thread_id(f_ctrl) - data = reduce(j_ctrl, zero, sum) - sum = add(data, x) - r = return(j_ctrl, data) + x1 = thread_id(f_ctrl1) + x1_d = add(x1, x1) + x2 = thread_id(f_ctrl2) + x2_d = add(x2, x2) + data2 = reduce(j_ctrl2, zero, sum2) + sum2 = add(data2, x2_d) + extra = add(data2, x1_d) + data1 = reduce(j_ctrl1, zero, sum1) + sum1 = add(data1, extra) + r = return(j_ctrl1, data1) diff --git a/hercules_samples/matmul/matmul.hir b/hercules_samples/matmul/matmul.hir index 2f0fb67afff4c7707523bd6ae26c87b995e4109b..8c34a31664f2b73ebac9077e8605ac221a266e0a 100644 --- a/hercules_samples/matmul/matmul.hir +++ b/hercules_samples/matmul/matmul.hir @@ -1,5 +1,5 @@ fn matmul<3>(a: array(f32, #0, #1), b: array(f32, #1, #2)) -> array(f32, #0, #2) - c = constant(array(f32, #0, #2), zero) + c = constant(array(f32, #0, #2), []) i_ctrl = fork(start, #0) i_idx = thread_id(i_ctrl) j_ctrl = fork(i_ctrl, #2)