diff --git a/Cargo.toml b/Cargo.toml index 0b9262c841d1871c580677854625180e70a9309a..badc4260d3f2e1dae31d0d8e294c97aeee4b4fc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,4 +24,5 @@ members = [ "juno_samples/matmul", "juno_samples/casts_and_intrinsics", "juno_samples/nested_ccp", + #"juno_samples/implicit_clone", ] diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs index 080c195a5680bc6c475b58ba4eac4ad5374c6773..fb7f653b9eb9dc9d39846210a4de5547b398c991 100644 --- a/hercules_cg/src/cpu.rs +++ b/hercules_cg/src/cpu.rs @@ -27,6 +27,7 @@ pub fn cpu_codegen<W: Write>( reverse_postorder: &Vec<NodeID>, typing: &Vec<TypeID>, control_subgraph: &Subgraph, + antideps: &Vec<(NodeID, NodeID)>, bbs: &Vec<NodeID>, w: &mut W, ) -> Result<(), Error> { @@ -38,6 +39,7 @@ pub fn cpu_codegen<W: Write>( reverse_postorder, typing, control_subgraph, + antideps, bbs, }; ctx.codegen_function(w) @@ -51,6 +53,7 @@ struct CPUContext<'a> { reverse_postorder: &'a Vec<NodeID>, typing: &'a Vec<TypeID>, control_subgraph: &'a Subgraph, + antideps: &'a Vec<(NodeID, NodeID)>, bbs: &'a Vec<NodeID>, } @@ -128,6 +131,7 @@ impl<'a> CPUContext<'a> { .filter(|id| !self.function.nodes[id.idx()].is_control()), ); let mut visited = bitvec![u8, Lsb0; 0; self.function.nodes.len()]; + let antideps = flip_antideps(&self.antideps); while let Some(id) = worklist.pop_front() { let node = &self.function.nodes[id.idx()]; if node.is_phi() @@ -135,7 +139,12 @@ impl<'a> CPUContext<'a> { || get_uses(node) .as_ref() .into_iter() - .all(|u| self.function.nodes[u.idx()].is_control() || visited[u.idx()]) + .chain(antideps.get(&id).into_iter().flatten()) + .all(|u| { + self.function.nodes[u.idx()].is_control() + || self.bbs[u.idx()] != self.bbs[id.idx()] + || visited[u.idx()] + }) { self.codegen_data_node(*id, &mut blocks)?; visited.set(id.idx(), true); diff --git a/hercules_cg/src/rt.rs b/hercules_cg/src/rt.rs index b5623d1a242f75687254ecd8b44a98d7d62fa7df..44aad61d72cdc2add0d46522da5d65f32dd58348 100644 --- a/hercules_cg/src/rt.rs +++ b/hercules_cg/src/rt.rs @@ -22,6 +22,7 @@ pub fn rt_codegen<W: Write>( reverse_postorder: &Vec<NodeID>, typing: &Vec<TypeID>, control_subgraph: &Subgraph, + antideps: &Vec<(NodeID, NodeID)>, bbs: &Vec<NodeID>, collection_objects: &CollectionObjects, callgraph: &CallGraph, @@ -34,6 +35,7 @@ pub fn rt_codegen<W: Write>( reverse_postorder, typing, control_subgraph, + antideps, bbs, collection_objects, callgraph, @@ -48,6 +50,7 @@ struct RTContext<'a> { reverse_postorder: &'a Vec<NodeID>, typing: &'a Vec<TypeID>, control_subgraph: &'a Subgraph, + antideps: &'a Vec<(NodeID, NodeID)>, bbs: &'a Vec<NodeID>, collection_objects: &'a CollectionObjects, callgraph: &'a CallGraph, @@ -218,6 +221,7 @@ impl<'a> RTContext<'a> { .filter(|id| !func.nodes[id.idx()].is_control()), ); let mut visited = bitvec![u8, Lsb0; 0; func.nodes.len()]; + let antideps = flip_antideps(&self.antideps); while let Some(id) = worklist.pop_front() { let node = &func.nodes[id.idx()]; if node.is_phi() @@ -225,7 +229,12 @@ impl<'a> RTContext<'a> { || get_uses(node) .as_ref() .into_iter() - .all(|u| func.nodes[u.idx()].is_control() || visited[u.idx()]) + .chain(antideps.get(&id).into_iter().flatten()) + .all(|u| { + func.nodes[u.idx()].is_control() + || self.bbs[u.idx()] != self.bbs[id.idx()] + || visited[u.idx()] + }) { self.codegen_data_node(*id, &mut blocks)?; visited.set(id.idx(), true); diff --git a/hercules_ir/src/antideps.rs b/hercules_ir/src/antideps.rs index a9080fd6d49904d162392b77af5eb218f81ea2bd..73ea30302d7022de7c03c0d1b3581e029cdc9ace 100644 --- a/hercules_ir/src/antideps.rs +++ b/hercules_ir/src/antideps.rs @@ -1,50 +1,249 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::iter::zip; + use crate::*; +/* + * In addition to collections, we need to figure out which "generation" of a + * collection a node may take as input. + */ +#[derive(PartialEq, Eq, Clone, Debug)] +struct GenerationLattice { + objs: BTreeSet<(CollectionObjectID, NodeID)>, +} + +impl Semilattice for GenerationLattice { + fn meet(a: &Self, b: &Self) -> Self { + GenerationLattice { + objs: a.objs.union(&b.objs).map(|x| *x).collect(), + } + } + + fn top() -> Self { + GenerationLattice { + objs: BTreeSet::new(), + } + } + + fn bottom() -> Self { + // Bottom is not representable for this lattice with our Semilattice + // interface, but we never need to construct it. + panic!() + } +} + /* * Function to assemble anti-dependence edges. Returns a list of pairs of nodes. - * The first item in the pair is the read node, and the second item is the write - * node. + * The first item in the pair is the reading node, and the second item is the + * mutating node. */ -pub fn antideps(function: &Function, def_use: &ImmutableDefUseMap) -> Vec<(NodeID, NodeID)> { - // Anti-dependence edges are between a write node and a read node, where - // each node uses the same array value. The read must be scheduled before - // the write to avoid incorrect compilation. - let mut antideps = vec![]; +pub fn antideps( + function: &Function, + reverse_postorder: &Vec<NodeID>, + objects: &FunctionCollectionObjects, +) -> Vec<(NodeID, NodeID)> { + // First, we analyze "generations" of collections as they are mutated. + // Originating nodes and mutating nodes start a new generation of a + // collection. Generations are not ordered due to loops, but are rather just + // node IDs of the originating or mutating node (parameter, constant, call, + // undef, write). Other nodes operating on collections mean reads / writes + // can operate on potentially different generations of multiple collections + // (phi, reduce, select). + let lattice = forward_dataflow(function, reverse_postorder, |inputs, id| { + match function.nodes[id.idx()] { + Node::Phi { + control: _, + data: _, + } + | Node::Reduce { + control: _, + init: _, + reduct: _, + } + | Node::Ternary { + op: TernaryOperator::Select, + first: _, + second: _, + third: _, + } => inputs + .into_iter() + .fold(GenerationLattice::top(), |acc, input| { + GenerationLattice::meet(&acc, input) + }), + Node::Parameter { index: _ } | Node::Constant { id: _ } | Node::Undef { ty: _ } => { + let objs = objects.objects(id); + GenerationLattice { + objs: objs.into_iter().map(|obj| (*obj, id)).collect(), + } + } + Node::Call { + control: _, + function: _, + dynamic_constants: _, + ref args, + } => { + let mut objs = BTreeSet::new(); + let call_objs = objects.objects(id); + + // If this call node might originate an object, add that to the + // lattice output - its generation is this call node. + for obj in call_objs { + if objects.origin(*obj) == CollectionObjectOrigin::Call(id) { + assert!(objs.len() <= 1); + objs.insert((*obj, id)); + } + } - for id in (0..function.nodes.len()).map(NodeID::new) { - // Collect the reads and writes to / from this collection. - let users = def_use.get_users(id); - let reads = users.iter().filter(|user| { - if let Node::Read { + // For every argument... + for (arg, arg_gens) in zip(args, inputs.into_iter().skip(1)) { + // Look at its objects... + for arg_obj in objects.objects(*arg) { + // For each object that might be returned... + if call_objs.contains(&arg_obj) { + let mutable = objects.mutators(*arg_obj).contains(&id); + for (obj, gen) in arg_gens.objs.iter() { + // Add that object to the output lattice. + if obj == arg_obj && mutable { + // Set the generation to this node if the + // object might be mutated. + objs.insert((*obj, id)); + } else if obj == arg_obj { + // Otherwise, keep the old generation. + objs.insert((*obj, *gen)); + } + } + } + } + } + GenerationLattice { objs } + } + Node::Read { + collect: _, + indices: _, + } => inputs[0].clone(), + Node::Write { + collect: _, + data: _, + indices: _, + } => { + // Writes update the generation to the write. + let objs = inputs[0].objs.iter().map(|(obj, _)| (*obj, id)).collect(); + GenerationLattice { objs } + } + _ => GenerationLattice::top(), + } + }); + + // Second, we generate anti-dependence edges from the dataflow analysis. + // There are three cases where an anti-dependence edge is generated: + // + // 1. A read node and a write node share an object and generation pair on + // their `collect` input. + // 2. A read node and a call node share an object and generation pair, where + // the pair is on the read's `collect` input and the pair is on any input + // of the call node AND the call node is a mutator of the object. + // 3. A call node and a write node share an object and generation pair, + // where the pair is on any input of the call node and the pair is on the + // write's `collect` input. + let mut reads_writes_calls_mut_calls_per_pair: BTreeMap< + (CollectionObjectID, NodeID), + (Vec<NodeID>, Vec<NodeID>, Vec<NodeID>, Vec<NodeID>), + > = BTreeMap::new(); + for (idx, node) in function.nodes.iter().enumerate() { + let id = NodeID::new(idx); + match node { + Node::Read { collect, indices: _, - } = function.nodes[user.idx()] - { - collect == id - } else { - false - } - }); - let mut writes = users.iter().filter(|user| { - if let Node::Write { + } => { + for pair in lattice[collect.idx()].objs.iter() { + reads_writes_calls_mut_calls_per_pair + .entry(*pair) + .or_default() + .0 + .push(id); + } + } + Node::Write { collect, data: _, indices: _, - } = function.nodes[user.idx()] - { - collect == id - } else { - false + } => { + for pair in lattice[collect.idx()].objs.iter() { + reads_writes_calls_mut_calls_per_pair + .entry(*pair) + .or_default() + .1 + .push(id); + } + } + Node::Call { + control: _, + function: _, + dynamic_constants: _, + ref args, + } => { + for arg in args { + for pair in lattice[arg.idx()].objs.iter() { + if objects.mutators(pair.0).contains(&id) { + reads_writes_calls_mut_calls_per_pair + .entry(*pair) + .or_default() + .3 + .push(id); + } else { + reads_writes_calls_mut_calls_per_pair + .entry(*pair) + .or_default() + .2 + .push(id); + } + } + } } - }); + _ => {} + } + } - // If there are any writes, compute the anti dependence edges. - if let Some(write) = writes.next() { - for read in reads { + // Once we've matched reads / writes / calls by object and generation pair, + // the pair itself no longer matters. + let mut antideps = vec![]; + for (_, (reads, writes, calls, mut_calls)) in reads_writes_calls_mut_calls_per_pair { + // Case 1: + for read in reads.iter() { + for write in writes.iter() { antideps.push((*read, *write)); } } + + // Case 2: + for read in reads.iter() { + for mut_call in mut_calls.iter() { + antideps.push((*read, *mut_call)); + } + } + + // Case 3: + for call in calls.iter().chain(mut_calls.iter()) { + for write in writes.iter() { + antideps.push((*call, *write)); + } + } } antideps } + +/* + * Utility to make a map from node to anti-dependency uses (map mutator -> + * reads). + */ +pub fn flip_antideps(antideps: &Vec<(NodeID, NodeID)>) -> BTreeMap<NodeID, Vec<NodeID>> { + let mut result: BTreeMap<NodeID, Vec<NodeID>> = BTreeMap::new(); + + for (read, mutator) in antideps { + result.entry(*mutator).or_default().push(*read); + } + + result +} diff --git a/hercules_ir/src/collections.rs b/hercules_ir/src/collections.rs index c3dafe7ab9515c9defc6733059f7191be67a2974..23d84b1b6629f0a477217d0657085d234bf6cfe1 100644 --- a/hercules_ir/src/collections.rs +++ b/hercules_ir/src/collections.rs @@ -1,9 +1,5 @@ -extern crate bitvec; - use std::collections::{BTreeMap, BTreeSet}; -use self::bitvec::prelude::*; - use crate::*; /* @@ -33,7 +29,7 @@ use crate::*; * - For each node in each function, which collection objects may be on the * output of the node? * - For each function, which collection objects may be mutated inside that - * function? + * function, and by what nodes? * - For each function, which collection objects may be returned? * - For each collection object, how was it originated? */ @@ -50,7 +46,7 @@ define_id_type!(CollectionObjectID); #[derive(Debug, Clone)] pub struct FunctionCollectionObjects { objects_per_node: Vec<Vec<CollectionObjectID>>, - mutated: BitVec<u8, Lsb0>, + mutated: Vec<Vec<NodeID>>, returned: Vec<CollectionObjectID>, origins: Vec<CollectionObjectOrigin>, } @@ -58,7 +54,7 @@ pub struct FunctionCollectionObjects { pub type CollectionObjects = BTreeMap<FunctionID, FunctionCollectionObjects>; impl CollectionObjectOrigin { - fn try_parameter(&self) -> Option<usize> { + pub fn try_parameter(&self) -> Option<usize> { match self { CollectionObjectOrigin::Parameter(index) => Some(*index), _ => None, @@ -87,7 +83,11 @@ impl FunctionCollectionObjects { } pub fn is_mutated(&self, object: CollectionObjectID) -> bool { - self.mutated[object.idx()] + !self.mutators(object).is_empty() + } + + pub fn mutators(&self, object: CollectionObjectID) -> &Vec<NodeID> { + &self.mutated[object.idx()] } pub fn num_objects(&self) -> usize { @@ -309,13 +309,13 @@ pub fn collection_objects( let returned = returned.into_iter().collect(); // Determine which objects are potentially mutated. - let mut mutated = bitvec![u8, Lsb0; 0; origins.len()]; + let mut mutated = vec![vec![]; origins.len()]; for (idx, node) in func.nodes.iter().enumerate() { if node.is_write() { // Every object that the write itself corresponds to is mutable // in this function. for object in objects_per_node[idx].iter() { - mutated.set(object.idx(), true); + mutated[object.idx()].push(NodeID::new(idx)); } } else if let Some((_, callee, _, args)) = node.try_call() { let fco = &collection_objects[&callee]; @@ -328,7 +328,7 @@ pub fn collection_objects( // Then every object corresponding to the argument node // in this function is mutable. for object in objects_per_node[arg.idx()].iter() { - mutated.set(object.idx(), true); + mutated[object.idx()].push(NodeID::new(idx)); } } } diff --git a/hercules_opt/src/lib.rs b/hercules_opt/src/lib.rs index c4cf21a989d335fcb44d9bc425ef2aef1befcede..4a4011b1f19c62d741f8d30189998039a1dd1b30 100644 --- a/hercules_opt/src/lib.rs +++ b/hercules_opt/src/lib.rs @@ -10,6 +10,7 @@ pub mod forkify; pub mod gvn; pub mod inline; pub mod interprocedural_sroa; +pub mod materialize_clones; pub mod outline; pub mod pass; pub mod phi_elim; @@ -29,6 +30,7 @@ pub use crate::forkify::*; pub use crate::gvn::*; pub use crate::inline::*; pub use crate::interprocedural_sroa::*; +pub use crate::materialize_clones::*; pub use crate::outline::*; pub use crate::pass::*; pub use crate::phi_elim::*; diff --git a/hercules_opt/src/materialize_clones.rs b/hercules_opt/src/materialize_clones.rs new file mode 100644 index 0000000000000000000000000000000000000000..79002d15b77c1e89af86d3fe22eab17028c05ef9 --- /dev/null +++ b/hercules_opt/src/materialize_clones.rs @@ -0,0 +1,17 @@ +extern crate hercules_ir; + +use self::hercules_ir::*; + +use crate::*; + +/* + * Top level function to materialize clones of collections. This transformation + * eliminates the possibility of multiple independent writes (including dynamic + * writes) to a single collection by introducing extra collection constants and + * inserting explicit clones. This allows us to make the simplifying assumption + * in the backend that collections have reference, rather than value, semantics. + * The pass calling this function is mandatory for correctness. + */ +pub fn materialize_clones(editor: &mut FunctionEditor, objects: &FunctionCollectionObjects) { + todo!() +} diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs index 932ddee9c256783afa14b8c83afabad9f20008c8..796f8c7945be4bc66e51a9956c26ebe79f486496 100644 --- a/hercules_opt/src/pass.rs +++ b/hercules_opt/src/pass.rs @@ -38,6 +38,7 @@ pub enum Pass { DeleteUncalled, ForkSplit, Unforkify, + MaterializeClones, InferSchedules, Verify, // Parameterized over whether analyses that aid visualization are necessary. @@ -239,13 +240,21 @@ impl PassManager { pub fn make_antideps(&mut self) { if self.antideps.is_none() { - self.make_def_uses(); + self.make_reverse_postorders(); + self.make_collection_objects(); self.antideps = Some( zip( - self.def_uses.as_ref().unwrap().iter(), self.module.functions.iter(), + zip( + self.reverse_postorders.as_ref().unwrap().iter(), + self.collection_objects.as_ref().unwrap().iter(), + ), ) - .map(|(def_use, function)| antideps(function, def_use)) + // Fine since collection_objects is a BTreeMap - iteration order + // is fixed. + .map(|(function, (reverse_postorder, objects))| { + antideps(function, reverse_postorder, objects.1) + }) .collect(), ); } @@ -790,6 +799,34 @@ impl PassManager { } self.clear_analyses(); } + Pass::MaterializeClones => { + self.make_def_uses(); + self.make_collection_objects(); + let def_uses = self.def_uses.as_ref().unwrap(); + let collection_objects = self.collection_objects.as_ref().unwrap(); + for idx in 0..self.module.functions.len() { + let constants_ref = + RefCell::new(std::mem::take(&mut self.module.constants)); + let dynamic_constants_ref = + RefCell::new(std::mem::take(&mut self.module.dynamic_constants)); + let types_ref = RefCell::new(std::mem::take(&mut self.module.types)); + let mut editor = FunctionEditor::new( + &mut self.module.functions[idx], + &constants_ref, + &dynamic_constants_ref, + &types_ref, + &def_uses[idx], + ); + materialize_clones(&mut editor, &collection_objects[&FunctionID::new(idx)]); + + self.module.constants = constants_ref.take(); + self.module.dynamic_constants = dynamic_constants_ref.take(); + self.module.types = types_ref.take(); + + self.module.functions[idx].delete_gravestones(); + } + self.clear_analyses(); + } Pass::InferSchedules => { self.make_def_uses(); self.make_fork_join_maps(); @@ -862,12 +899,14 @@ impl PassManager { self.make_reverse_postorders(); self.make_typing(); self.make_control_subgraphs(); + self.make_antideps(); self.make_bbs(); self.make_collection_objects(); self.make_callgraph(); let reverse_postorders = self.reverse_postorders.as_ref().unwrap(); let typing = self.typing.as_ref().unwrap(); let control_subgraphs = self.control_subgraphs.as_ref().unwrap(); + let antideps = self.antideps.as_ref().unwrap(); let bbs = self.bbs.as_ref().unwrap(); let collection_objects = self.collection_objects.as_ref().unwrap(); let callgraph = self.callgraph.as_ref().unwrap(); @@ -886,6 +925,7 @@ impl PassManager { &reverse_postorders[idx], &typing[idx], &control_subgraphs[idx], + &antideps[idx], &bbs[idx], &mut llvm_ir, ) @@ -896,6 +936,7 @@ impl PassManager { &reverse_postorders[idx], &typing[idx], &control_subgraphs[idx], + &antideps[idx], &bbs[idx], &collection_objects, &callgraph, @@ -964,8 +1005,11 @@ impl PassManager { self.fork_join_maps = None; self.fork_join_nests = None; self.loops = None; + self.reduce_cycles = None; self.antideps = None; + self.data_nodes_in_fork_joins = None; self.bbs = None; + self.collection_objects = None; self.callgraph = None; } diff --git a/juno_samples/implicit_clone/Cargo.toml b/juno_samples/implicit_clone/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..928fa1f2718b9637da2ac3fe740a0d893345d576 --- /dev/null +++ b/juno_samples/implicit_clone/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "juno_implicit_clone" +version = "0.1.0" +authors = ["Russel Arbore <rarbore2@illinois.edu>"] +edition = "2021" + +[[bin]] +name = "juno_implicit_clone" +path = "src/main.rs" + +[build-dependencies] +juno_build = { path = "../../juno_build" } + +[dependencies] +juno_build = { path = "../../juno_build" } +with_builtin_macros = "0.1.0" +async-std = "*" diff --git a/juno_samples/implicit_clone/build.rs b/juno_samples/implicit_clone/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..3378966b64a71a239178edc6a010f9081939f58a --- /dev/null +++ b/juno_samples/implicit_clone/build.rs @@ -0,0 +1,10 @@ +extern crate juno_build; +use juno_build::JunoCompiler; + +fn main() { + JunoCompiler::new() + .file_in_src("implicit_clone.jn") + .unwrap() + .build() + .unwrap(); +} diff --git a/juno_samples/implicit_clone/src/implicit_clone.jn b/juno_samples/implicit_clone/src/implicit_clone.jn new file mode 100644 index 0000000000000000000000000000000000000000..6dfaff6c69cd7d553ba8075b9507d526c3268f82 --- /dev/null +++ b/juno_samples/implicit_clone/src/implicit_clone.jn @@ -0,0 +1,17 @@ +#[entry] +fn antideps(a : usize, b : usize) -> i32 { + let arr : i32[3]; + let r = arr[b]; + arr[a] = 5; + return r + arr[b]; +} + +#[entry] +fn implicit_clone(input : i32) -> i32 { + let arr : i32[3]; + arr[0] = 2; + let arr2 = arr; + arr2[1] = input; + arr[2] = 4; + return arr[0] + arr2[0] + arr[1] + arr2[1] + arr[2] + arr2[2]; +} diff --git a/juno_samples/implicit_clone/src/main.rs b/juno_samples/implicit_clone/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..73edc4646972bd100c10edbc7c87c930c8003b70 --- /dev/null +++ b/juno_samples/implicit_clone/src/main.rs @@ -0,0 +1,23 @@ +#![feature(future_join, box_as_ptr)] + +extern crate async_std; +extern crate juno_build; + +juno_build::juno!("implicit_clone"); + +fn main() { + async_std::task::block_on(async { + let output = antideps(1, 1).await; + println!("{}", output); + assert_eq!(output, 5); + + let output = implicit_clone(3).await; + println!("{}", output); + assert_eq!(output, 9); + }); +} + +#[test] +fn implicit_clone_test() { + main(); +}