diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs index f9046b434dc707224f8fbbddb6fec766eaf0b1b9..2c0f085df7f3057380a8a9364ad1acc70a71f640 100644 --- a/hercules_ir/src/dom.rs +++ b/hercules_ir/src/dom.rs @@ -77,6 +77,26 @@ impl DomTree { .1 } + /* + * Find the node with the shallowest level in the dom tree amongst the nodes + * given. + */ + pub fn shallowest_amongst<I>(&self, x: I) -> NodeID + where + I: Iterator<Item = NodeID>, + { + x.map(|x| { + if x == self.root { + (0, x) + } else { + (self.idom[&x].0, x) + } + }) + .min_by(|x, y| x.0.cmp(&y.0)) + .unwrap() + .1 + } + /* * Find the least common ancestor in the tree of two nodes. This is an * ancestor of the two nodes that is as far down the tree as possible. diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs index 11d23e618c7b872e43ef04e6c02e1b18348e473c..8bb8e4ef219f440c598ef3e07aeb7bf81dc178ef 100644 --- a/hercules_ir/src/ir.rs +++ b/hercules_ir/src/ir.rs @@ -1296,6 +1296,19 @@ impl Node { } } + pub fn try_write(&self) -> Option<(NodeID, NodeID, &[Index])> { + if let Node::Write { + collect, + data, + indices, + } = self + { + Some((*collect, *data, indices)) + } else { + None + } + } + pub fn is_zero_constant(&self, constants: &Vec<Constant>) -> bool { if let Node::Constant { id } = self && constants[id.idx()].is_zero() diff --git a/hercules_opt/src/dce.rs b/hercules_opt/src/dce.rs index 75268694fdccb23920cccf3cd60cb5ae6738ef0f..839e9ad1e5c586c56c9c0d697b3267e6dc6a5abe 100644 --- a/hercules_opt/src/dce.rs +++ b/hercules_opt/src/dce.rs @@ -6,8 +6,7 @@ use self::hercules_ir::ir::*; use crate::*; /* - * Top level function to run dead code elimination. Deletes nodes by setting - * nodes to gravestones. Works with a function already containing gravestones. + * Top level function to run dead code elimination. */ pub fn dce(editor: &mut FunctionEditor) { // Create worklist (starts as all nodes). diff --git a/hercules_opt/src/gvn.rs b/hercules_opt/src/gvn.rs index a9db0cd879a0796f0ae1545c7bb1048d012b0d39..5f7138e5ee691f6288248d6f18866efd0116dc42 100644 --- a/hercules_opt/src/gvn.rs +++ b/hercules_opt/src/gvn.rs @@ -11,7 +11,7 @@ use crate::*; * fairly simple compared to in a normal CFG. Needs access to constants for * identity function simplification. */ -pub fn gvn(editor: &mut FunctionEditor) { +pub fn gvn(editor: &mut FunctionEditor, gvn_constants_and_clones: bool) { // Create worklist (starts as all nodes) and value number hashmap. let mut worklist: Vec<NodeID> = (0..editor.func().nodes.len()).map(NodeID::new).collect(); let mut value_numbers: HashMap<Node, NodeID> = HashMap::new(); @@ -28,7 +28,15 @@ pub fn gvn(editor: &mut FunctionEditor) { // Next, check if there is a value number for this simplified value yet. if let Some(number) = value_numbers.get(&editor.func().nodes[value.idx()]) { // If the number is this worklist item, there's nothing to be done. - if *number == work { + // Also, don't GVN constants and clones if indicated to not do so. + if *number == work + || (!gvn_constants_and_clones + && (editor.func().nodes[work.idx()].is_constant() + || editor.func().nodes[work.idx()] + .try_write() + .map(|(_, _, indices)| indices.is_empty()) + .unwrap_or(false))) + { continue; } diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index 254524f9eb75865a0d0c480dd5aa7fd71115e374..3c89c7da47682ec048b8d84fe67d3cae09727a4b 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -2,9 +2,8 @@ extern crate bitvec; extern crate hercules_cg; extern crate hercules_ir; -use std::collections::{BTreeSet, HashMap, VecDeque}; +use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque}; use std::iter::{empty, once, zip, FromIterator}; -use std::mem::take; use self::bitvec::prelude::*; @@ -70,11 +69,11 @@ pub fn legalize_reference_semantics( ) { Ok(bbs) => bbs, Err((obj, reader)) => { - induce_clone(editor, typing, obj, reader); + todo!(); return None; } }; - if materialize_clones(editor, typing, control_subgraph, objects, &bbs) { + if materialize_clones(editor, typing, control_subgraph, dom, loops, objects, &bbs) { None } else { Some(bbs) @@ -223,6 +222,7 @@ fn basic_blocks( || root_block_iterated_users.contains(&mutator)) && mutating_objects(function, func_id, *mutator, objects) .any(|mutated| read_objs.contains(&mutated)) + && id != mutator { antideps.insert((*id, *mutator)); } @@ -534,6 +534,8 @@ fn materialize_clones( editor: &mut FunctionEditor, typing: &Vec<TypeID>, control_subgraph: &Subgraph, + dom: &DomTree, + loops: &LoopTree, objects: &CollectionObjects, bbs: &BasicBlocks, ) -> bool { @@ -543,7 +545,8 @@ fn materialize_clones( // This is the only place this form is used, so just hardcode it here. // // This forward dataflow analysis tracks which collections are used at each - // program point. Collections are referred to using node IDs. Specifically: + // program point, and by what user nodes. Collections are referred to using + // node IDs. Specifically: // // - Phi - a phi node adds its inputs to the used set and removes itself // from the used set. If a phi uses an ID that is used along the edge of @@ -567,11 +570,11 @@ fn materialize_clones( // to the used set and removes itself from the used set. If any mutated // input is already used, a clone is induced. // - // Reads of sub-collections (select, read, and call nodes) use a collection - // because they may have downstream writes that depend on the new "view" of - // the same collection. This does not include reads that "end" (the `data` - // input of a write). This analysis does not consider parallel mutations in - // fork-joins, which are handled separately later in this function. + // Reads of sub-collections (select, some read, and some call nodes) use a + // collection because they may have downstream writes that depend on the new + // "sub-view" of the same collection. This does not include reads that "end" + // (most reads, some calls, the `data` input of a write). This analysis does + // not consider parallel mutations in fork-joins. let rev_po = control_subgraph.rev_po(NodeID::new(0)); let mut total_num_pts = 0; let mut bb_to_prefix_sum = vec![0; bbs.0.len()]; @@ -581,82 +584,85 @@ fn materialize_clones( total_num_pts += insts.len() + 1; } } - // Lattice maps each program point to a set of used values. Top is that no - // nodes are used yet. + // Lattice maps each program point to a set of used values and their + // possible users. Top is that no nodes are used yet. let nodes = &editor.func().nodes; let func_id = editor.func_id(); - let mut lattice: Vec<BTreeSet<NodeID>> = vec![BTreeSet::new(); total_num_pts]; + let meet = |left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>, + right: &BTreeMap<NodeID, BTreeSet<NodeID>>| { + for (used, users) in right.into_iter() { + left.entry(*used).or_default().extend(users.into_iter()); + } + }; + let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts]; loop { let mut changed = false; for bb in rev_po.iter() { // The lattice value of the first point is the meet of the // predecessor terminating lattice values. - let mut top_value = take(&mut lattice[bb_to_prefix_sum[bb.idx()]]); + let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]]; + let mut new_top_value = BTreeMap::new(); // Clearing `top_value` is not necessary since used nodes are never // removed from lattice values, only added. for pred in control_subgraph.preds(*bb) { - // It should not be possible in Hercules IR for a basic block to - // be one of its own predecessors. - assert_ne!(*bb, pred); let last_pt = bbs.1[pred.idx()].len(); - for elem in lattice[bb_to_prefix_sum[pred.idx()] + last_pt].iter() { - changed |= top_value.insert(*elem); - } + meet( + &mut new_top_value, + &lattice[bb_to_prefix_sum[pred.idx()] + last_pt], + ); } - lattice[bb_to_prefix_sum[bb.idx()]] = top_value; + changed |= *old_top_value != new_top_value; + lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value; // The lattice value of following points are determined by their // immediate preceding instructions. let insts = &bbs.1[bb.idx()]; for (prev_pt, inst) in insts.iter().enumerate() { - let mut new_value = take(&mut lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1]); + let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1]; let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt]; + let mut new_value = prev_value.clone(); match nodes[inst.idx()] { Node::Phi { control: _, ref data, } if !objects[&func_id].objects(*inst).is_empty() => { for elem in data { - changed |= new_value.insert(*elem); + new_value.entry(*elem).or_default().insert(*inst); } - changed |= new_value.remove(inst); + new_value.remove(inst); } Node::Ternary { op: TernaryOperator::Select, first: _, second, third, + } + | Node::Reduce { + control: _, + init: second, + reduct: third, } => { if !objects[&func_id].objects(*inst).is_empty() { - changed |= new_value.insert(second); - changed |= new_value.insert(third); - changed |= new_value.remove(inst); + new_value.entry(second).or_default().insert(*inst); + new_value.entry(third).or_default().insert(*inst); + new_value.remove(inst); } } - Node::Reduce { - control: _, - init, - reduct, - } if !objects[&func_id].objects(*inst).is_empty() => { - changed |= new_value.insert(init); - changed |= new_value.insert(reduct); - changed |= new_value.remove(inst); - } Node::Read { collect, indices: _, } if !objects[&func_id].objects(*inst).is_empty() => { - changed |= new_value.insert(collect); - changed |= new_value.remove(inst); + new_value.entry(collect).or_default().insert(*inst); + new_value.remove(inst); } Node::Write { collect, data: _, indices: _, } => { - changed |= new_value.insert(collect); - changed |= new_value.remove(inst); + new_value.entry(collect).or_default().insert(*inst); + new_value.remove(inst); } Node::Call { control: _, @@ -674,23 +680,21 @@ fn materialize_clones( }) .unwrap_or(false) { - changed |= new_value.insert(*arg); + new_value.entry(*arg).or_default().insert(*inst); } } - changed |= new_value.remove(inst); - } - _ => { - for elem in prev_value { - changed |= new_value.insert(*elem); - } + new_value.remove(inst); } + _ => {} } + changed |= *old_value != new_value; lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value; } // Handle reduces in this block specially at the very end. let last_pt = insts.len(); - let mut bottom_value = take(&mut lattice[bb_to_prefix_sum[bb.idx()] + last_pt]); + let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt]; + let mut new_bottom_value = old_bottom_value.clone(); for inst in insts.iter() { if let Node::Reduce { control: _, @@ -699,27 +703,241 @@ fn materialize_clones( } = nodes[inst.idx()] { assert!( - bottom_value.contains(&reduct), + new_bottom_value.contains_key(&reduct), "PANIC: Can't handle clones inside a reduction cycle currently." ); - changed |= bottom_value.remove(inst); + new_bottom_value.remove(inst); } } - lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = bottom_value; + changed |= *old_bottom_value != new_bottom_value; + lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value; } if !changed { break; } } + let mut super_value = BTreeMap::new(); + for value in lattice.iter() { + meet(&mut super_value, value); + } + + // Helper to induce a clone when an implicit clone is identified. + let nodes = nodes.clone(); + let mut induce_clone = |object: NodeID, + user: NodeID, + value: &BTreeMap<NodeID, BTreeSet<NodeID>>| { + // If `user` already used `object` and tries to use it again, then the + // clone is a "loop induced" clone. Otherwise, it's a simple clone. + if !value[&object].contains(&user) { + let success = editor.edit(|mut edit| { + // Create the constant collection object for allocation. + let object_ty = typing[object.idx()]; + let object_cons = edit.add_zero_constant(object_ty); + let cons_node = edit.add_node(Node::Constant { id: object_cons }); + + // Create the clone into the new constant collection. + let clone_node = edit.add_node(Node::Write { + collect: cons_node, + data: object, + indices: vec![].into_boxed_slice(), + }); + + // Make user use the cloned object. + edit.replace_all_uses_where(object, clone_node, |id| *id == user) + }); + assert!(success); + } else { + // Figure out where to place that phi. This is the deepest + // loop header where `user` is responsible for making `object` used + // used at the top of the block, and the block dominates the block + // containing `user`. If `user` is a phi, then the region it's + // attached to is excluded from eligibility. + let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| { + lattice[bb_to_prefix_sum[bb.idx()]] + .get(&object) + .unwrap_or(&BTreeSet::new()) + .contains(&user) + && dom.does_dom(*bb, bbs.0[user.idx()]) + && loops.contains(*bb) + && loops.is_in_loop(*bb, bbs.0[user.idx()]) + && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()]) + }); + let top_block = eligible_blocks + .max_by_key(|bb| loops.nesting(*bb).unwrap()) + .unwrap(); + assert!(editor.func().nodes[top_block.idx()].is_region()); + + // Figure out the users of `object` that we need to phi back + // upwards. Assign each user a number indicating how far down the + // user chain it is, higher is farther down. This is used for + // picking the most downstream user later. + let mut users: BTreeMap<NodeID, usize> = BTreeMap::new(); + let mut workset: BTreeSet<NodeID> = BTreeSet::new(); + workset.insert(object); + let mut chain_ordering = 1; + while let Some(pop) = workset.pop_first() { + let iterated_users: BTreeSet<_> = super_value + .get(&pop) + .map(|users| users.into_iter()) + .into_iter() + .flatten() + .map(|id| *id) + .filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()])) + .collect(); + workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id))); + for user in iterated_users { + *users.entry(user).or_default() = chain_ordering; + chain_ordering += 1; + } + } + + // The fringe users may not dominate any predecessors of the loop + // header. The following is some Juno code that exposes this: + // + // fn problematic(a : size) -> i32 { + // for i = 0 to a { + // let arr : i32[1]; + // for j = 0 to a { + // arr[0] = 1; + // } + // } + // return 0; + // } + // + // Note that `arr` induces a clone each iteration, since its value + // needs to be reset to all zeros. However, it should also be noted + // that the most fringe user of `arr`, the write inside the inner + // loop, does not dominate the bottom of the outer loop. Thus, we + // need to insert a phi in the bottom block of the outer loop to + // retrieve either the write, or `arr` before the inner loop. The + // general version of this problem requires the following solution. + // Our goal is to figure out which downstream user represents + // `object` at each block in the loop. We first assign each block + // containing a user the most downstream user it contains. Then, we + // create a dummy phi for every region (including the header) in the + // loop, which is the downstream user for that block. Then, every + // other block is assigned the downstream user of its single + // predecessor. This basically amounts to recreating SSA for + // `object` inside the loop. + let mut user_per_loop_bb = BTreeMap::new(); + let mut added_phis = BTreeMap::new(); + let mut top_phi = NodeID::new(0); + // Assign existing users. + for (user, ordering) in users.iter() { + let bb = bbs.0[user.idx()]; + if let Some(old_user) = user_per_loop_bb.get(&bb) + && users[old_user] > *ordering + { + } else { + user_per_loop_bb.insert(bb, *user); + } + } + // Assign dummy phis. + for bb in loops.nodes_in_loop(top_block) { + if (!user_per_loop_bb.contains_key(&bb) || bb == top_block) + && editor.func().nodes[bb.idx()].is_region() + { + let success = editor.edit(|mut edit| { + let phi_node = edit.add_node(Node::Phi { + control: bb, + data: empty().collect(), + }); + if bb != top_block || !user_per_loop_bb.contains_key(&bb) { + user_per_loop_bb.insert(bb, phi_node); + } + if bb == top_block { + top_phi = phi_node; + } + added_phis.insert(phi_node, bb); + Ok(edit) + }); + assert!(success); + } + } + // Assign users for the rest of the blocks. + for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) { + if !user_per_loop_bb.contains_key(&bb) { + assert!(control_subgraph.preds(*bb).count() == 1); + user_per_loop_bb.insert( + *bb, + user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()], + ); + } + } + + // Induce the clone. + let success = editor.edit(|mut edit| { + // Create the constant collection object for allocation. + let object_ty = typing[object.idx()]; + let object_cons = edit.add_zero_constant(object_ty); + let cons_node = edit.add_node(Node::Constant { id: object_cons }); + + // Create the phis. + let mut phi_map = BTreeMap::new(); + let mut real_phis = BTreeSet::new(); + for (dummy, bb) in added_phis { + let real = edit.add_node(Node::Phi { + control: bb, + data: control_subgraph + .preds(bb) + .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node)) + .collect(), + }); + phi_map.insert(dummy, real); + real_phis.insert(real); + } + + // Create the clone into the phi. + let real_top_phi = phi_map[&top_phi]; + let clone_node = edit.add_node(Node::Write { + collect: real_top_phi, + data: object, + indices: vec![].into_boxed_slice(), + }); + + // Make users use the cloned object. + edit = edit.replace_all_uses_where(object, clone_node, |id| { + id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()]) + })?; + + // Get rid of the dummy phis. + for (dummy, real) in phi_map { + edit = edit.replace_all_uses(dummy, real)?; + edit = edit.delete_node(dummy)?; + } + + // Make phis use the clone instead of the top phi. + edit = + edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)?; + + Ok(edit) + }); + assert!(success); + + // De-duplicate phis. + gvn(editor, false); + + // Get rid of unused phis. + dce(editor); + + // Simplify phis. + phi_elim(editor); + } + }; // Now that we've computed the used collections dataflow analysis, use the // results to materialize a clone whenever a node attempts to use an already - // used node. - let mut any_induced = false; - let nodes = nodes.clone(); - for bb in rev_po.iter() { + // used node. As soon as any clone is found, return since that clone needs + // to get placed before other clones can be discovered. Traverse blocks in + // postorder so that clones inside loops are discovered before loop-induced + // clones. + for bb in rev_po.iter().rev() { let insts = &bbs.1[bb.idx()]; + // Accumulate predecessor bottom used sets for phis. Phis are special in + // that they need to be path sensitive, but multiple phis in a single + // block may use a single collection, and that needs to induce a clone. + let mut phi_acc_bottoms = BTreeMap::new(); for (prev_pt, inst) in insts.iter().enumerate() { let value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt]; match nodes[inst.idx()] { @@ -731,11 +949,18 @@ fn materialize_clones( // predecessor's bottom lattice value (phis need to be path- // sensitive). for (pred, arg) in zip(control_subgraph.preds(*bb), data) { - let last_pt = bbs.1[pred.idx()].len(); - let bottom = &lattice[bb_to_prefix_sum[pred.idx()] + last_pt]; - if bottom.contains(arg) { - induce_clone(editor, typing, *arg, *inst); - any_induced = true; + let bottom = phi_acc_bottoms.entry(pred).or_insert_with(|| { + let last_pt = bbs.1[pred.idx()].len(); + let bottom = &lattice[bb_to_prefix_sum[pred.idx()] + last_pt]; + bottom.clone() + }); + if bottom.contains_key(&arg) { + induce_clone(*arg, *inst, bottom); + return true; + } else { + // Subsequent phis using `arg` along the same + // predecessor induce a clone. + bottom.insert(*arg, once(*inst).collect()); } } } @@ -745,13 +970,13 @@ fn materialize_clones( second, third, } => { - if value.contains(&second) { - induce_clone(editor, typing, second, *inst); - any_induced = true; + if value.contains_key(&second) { + induce_clone(second, *inst, &value); + return true; } - if value.contains(&third) { - induce_clone(editor, typing, third, *inst); - any_induced = true; + if value.contains_key(&third) { + induce_clone(third, *inst, &value); + return true; } } Node::Reduce { @@ -759,18 +984,18 @@ fn materialize_clones( init, reduct: _, } => { - if value.contains(&init) { - induce_clone(editor, typing, init, *inst); - any_induced = true; + if value.contains_key(&init) { + induce_clone(init, *inst, &value); + return true; } } Node::Read { collect, indices: _, } if !objects[&func_id].objects(*inst).is_empty() => { - if value.contains(&collect) { - induce_clone(editor, typing, collect, *inst); - any_induced = true; + if value.contains_key(&collect) { + induce_clone(collect, *inst, &value); + return true; } } Node::Write { @@ -778,9 +1003,9 @@ fn materialize_clones( data: _, indices: _, } => { - if value.contains(&collect) { - induce_clone(editor, typing, collect, *inst); - any_induced = true; + if value.contains_key(&collect) { + induce_clone(collect, *inst, &value); + return true; } } Node::Call { @@ -798,10 +1023,10 @@ fn materialize_clones( || callee_objects.returned_objects().contains(&object) }) .unwrap_or(false) - && value.contains(arg) + && value.contains_key(arg) { - induce_clone(editor, typing, *arg, *inst); - any_induced = true; + induce_clone(*arg, *inst, value); + return true; } } } @@ -809,27 +1034,5 @@ fn materialize_clones( } } } - any_induced -} - -/* - * Utility to insert a clone before a use of a collection. - */ -fn induce_clone(editor: &mut FunctionEditor, typing: &Vec<TypeID>, object: NodeID, user: NodeID) { - editor.edit(|mut edit| { - // Create the constant collection object for allocation. - let object_ty = typing[object.idx()]; - let object_cons = edit.add_zero_constant(object_ty); - let cons_node = edit.add_node(Node::Constant { id: object_cons }); - - // Create the clone into the new constant collection. - let clone_node = edit.add_node(Node::Write { - collect: cons_node, - data: object, - indices: vec![].into_boxed_slice(), - }); - - // Make user use the cloned object. - edit.replace_all_uses_where(object, clone_node, |id| *id == user) - }); + false } diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs index d0449a4a57b2325fcb7c2a766e1d17bff2587ea5..baeaae87ddece17c761d84bd75999b57de7fc96a 100644 --- a/hercules_opt/src/pass.rs +++ b/hercules_opt/src/pass.rs @@ -395,7 +395,7 @@ impl PassManager { &types_ref, &def_uses[idx], ); - gvn(&mut editor); + gvn(&mut editor, false); self.module.constants = constants_ref.take(); self.module.dynamic_constants = dynamic_constants_ref.take(); @@ -828,7 +828,11 @@ impl PassManager { &types_ref, &def_uses[idx], ); - infer_parallel_reduce(&mut editor, &fork_join_maps[idx], &reduce_cycles[idx]); + infer_parallel_reduce( + &mut editor, + &fork_join_maps[idx], + &reduce_cycles[idx], + ); infer_parallel_fork(&mut editor, &fork_join_maps[idx]); infer_vectorizable(&mut editor, &fork_join_maps[idx]); infer_tight_associative(&mut editor, &reduce_cycles[idx]); diff --git a/juno_frontend/src/lib.rs b/juno_frontend/src/lib.rs index 0dd5cdd338db792a9725b22cc95527e5a08688c2..075c7a45adb112f9093eb1861d133aa922adbf52 100644 --- a/juno_frontend/src/lib.rs +++ b/juno_frontend/src/lib.rs @@ -201,6 +201,8 @@ pub fn compile_ir( pm.add_pass(hercules_opt::pass::Pass::Xdot(true)); } + add_pass!(pm, verify, LegalizeReferenceSemantics); + add_verified_pass!(pm, verify, DCE); add_pass!(pm, verify, LegalizeReferenceSemantics); pm.add_pass(hercules_opt::pass::Pass::Codegen(output_dir, module_name)); pm.run_passes(); diff --git a/juno_samples/implicit_clone/src/implicit_clone.jn b/juno_samples/implicit_clone/src/implicit_clone.jn index a2d6cba0c275fdbe776964558de1ec800c1c2ab2..d06a64981db4e74f9441c3db5e0b6220d4729dc0 100644 --- a/juno_samples/implicit_clone/src/implicit_clone.jn +++ b/juno_samples/implicit_clone/src/implicit_clone.jn @@ -27,6 +27,57 @@ fn loop_implicit_clone(input : i32) -> i32 { return r + 7; } +#[entry] +fn double_loop_implicit_clone(a : usize) -> usize { + for i = 0 to a { + let arr : i32[1]; + for j = 0 to a { + arr[0] = 1; + } + } + return 42; +} + +#[entry] +fn tricky_loop_implicit_clone(a : usize, b : usize) -> i32 { + let x = 0; + for j = 0 to 2 { + for i = 0 to 5 { + let arr : i32[3]; + let arr2 : i32[1]; + if a == b { + arr[a] += 7; + } else { + arr[a] += 1; + } + for k = 0 to (a + b - 1) { + arr[a] += 2; + arr2[0] += 1; + } + x += arr[b]; + } + } + return x; +} + +#[entry] +fn tricky2_loop_implicit_clone(a : usize, b : usize) -> i32 { + let x = 0; + for i = 0 to 3 { + let arr : i32[1]; + if a == b { + arr[0] = 6; + } else { + arr[0] = 9; + } + for j = 0 to 4 { + arr[0] += 1; + } + x += arr[0]; + } + return x; +} + #[entry] fn no_implicit_clone(input : i32) -> i32 { let arr : i32[2]; diff --git a/juno_samples/implicit_clone/src/main.rs b/juno_samples/implicit_clone/src/main.rs index 45c722d783494d7fd5a9b9e91d4ea4db90ce4f0c..c0adaaef28c84278b585e3ecf27014217e0ccaf7 100644 --- a/juno_samples/implicit_clone/src/main.rs +++ b/juno_samples/implicit_clone/src/main.rs @@ -15,6 +15,18 @@ fn main() { println!("{}", output); assert_eq!(output, 7); + let output = double_loop_implicit_clone(3).await; + println!("{}", output); + assert_eq!(output, 42); + + let output = tricky_loop_implicit_clone(2, 2).await; + println!("{}", output); + assert_eq!(output, 130); + + let output = tricky2_loop_implicit_clone(2, 3).await; + println!("{}", output); + assert_eq!(output, 39); + let output = no_implicit_clone(4).await; println!("{}", output); assert_eq!(output, 13); diff --git a/juno_samples/matmul/src/matmul.jn b/juno_samples/matmul/src/matmul.jn index 775bb382a46a640cc45ddbf5d68b31c5c1d58bd3..f59cc21550ea10495deda644567a191608182faf 100644 --- a/juno_samples/matmul/src/matmul.jn +++ b/juno_samples/matmul/src/matmul.jn @@ -20,10 +20,8 @@ fn tiled_64_matmul<n : usize, m : usize, l : usize>(a : i32[n, m], b : i32[m, l] for bi = 0 to n / 64 { for bk = 0 to l / 64 { - // TODO: make these all the same size, clone analysis should undo GVN's - // combining of these three arrays. - let atile : i32[66, 64]; - let btile : i32[65, 64]; + let atile : i32[64, 64]; + let btile : i32[64, 64]; let ctile : i32[64, 64]; for tile_idx = 0 to m / 64 { @@ -31,11 +29,6 @@ fn tiled_64_matmul<n : usize, m : usize, l : usize>(a : i32[n, m], b : i32[m, l] for tk = 0 to 64 { atile[ti, tk] = a[bi * 64 + ti, tile_idx * 64 + tk]; btile[ti, tk] = b[tile_idx * 64 + ti, bk * 64 + tk]; - // TODO: remove setting ctile to zero explicitly, clone analysis - // should see a lack of a phi for ctile in the block loops and - // induce a copy of an initial value of ctile (all zeros) on each - // iteration of the block loops. - ctile[ti, tk] = 0; } } for ti = 0 to 64 {