diff --git a/hercules_ir/src/subgraph.rs b/hercules_ir/src/subgraph.rs index a2aedadf0fbc996bc0eb46feae77ee7a526de491..a4948016b57d5efaf0c64183c6e3c78141000bf8 100644 --- a/hercules_ir/src/subgraph.rs +++ b/hercules_ir/src/subgraph.rs @@ -23,6 +23,7 @@ pub struct Subgraph { original_num_nodes: u32, } +#[derive(Debug, Clone)] pub struct SubgraphIterator<'a> { nodes: &'a Vec<NodeID>, edges: &'a [u32], diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index 3c89c7da47682ec048b8d84fe67d3cae09727a4b..5db49ec467977076ca1d82f933e23020f18ce2f4 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -321,10 +321,11 @@ fn basic_blocks( // Look between the LCA and the schedule early location to place the // node. let schedule_early = schedule_early[id.idx()].unwrap(); + let schedule_late = lca.unwrap_or(schedule_early); let mut chain = dom // If the node has no users, then it doesn't really matter where we // place it - just place it at the early placement. - .chain(lca.unwrap_or(schedule_early), schedule_early); + .chain(schedule_late, schedule_early); if let Some(mut location) = chain.next() { /* @@ -539,10 +540,108 @@ fn materialize_clones( objects: &CollectionObjects, bbs: &BasicBlocks, ) -> bool { - // First, run dataflow analysis to figure out which access to collections - // induce clones. This dataflow analysis depends on basic block assignments - // and is more analogous to standard dataflow analysis in CFG + SSA IRs. - // This is the only place this form is used, so just hardcode it here. + let rev_po = control_subgraph.rev_po(NodeID::new(0)); + let mut total_num_pts = 0; + let mut bb_to_prefix_sum = vec![0; bbs.0.len()]; + for ((idx, bb), insts) in zip(bbs.0.iter().enumerate(), bbs.1.iter()) { + if idx == bb.idx() { + bb_to_prefix_sum[idx] = total_num_pts; + total_num_pts += insts.len() + 1; + } + } + + // Calculate two lattices - one that includes back edges, and one that + // doesn't. We want to handle simple clones before loop induced clones, so + // we first materialize clones based on the no-back-edges lattice, and hten + // based on the full lattice. + let mut no_back_edge_lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = + vec![BTreeMap::new(); total_num_pts]; + used_collections_dataflow( + editor, + &mut no_back_edge_lattice, + &rev_po, + &bb_to_prefix_sum, + control_subgraph, + objects, + bbs, + ); + let mut super_value = BTreeMap::new(); + if find_clones( + editor, + &super_value, + &no_back_edge_lattice, + &rev_po, + &typing, + control_subgraph, + dom, + loops, + objects, + &bb_to_prefix_sum, + bbs, + ) { + return true; + } + + // After inducing simple clones, calculate the full lattice and materialize + // any loop induced clones. + let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts]; + loop { + let changed = used_collections_dataflow( + editor, + &mut lattice, + &rev_po, + &bb_to_prefix_sum, + control_subgraph, + objects, + bbs, + ); + if !changed { + break; + } + } + for value in lattice.iter() { + meet(&mut super_value, value); + } + find_clones( + editor, + &super_value, + &lattice, + &rev_po, + &typing, + control_subgraph, + dom, + loops, + objects, + &bb_to_prefix_sum, + bbs, + ) +} + +fn meet(left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>, right: &BTreeMap<NodeID, BTreeSet<NodeID>>) { + for (used, users) in right.into_iter() { + left.entry(*used).or_default().extend(users.into_iter()); + } +} + +/* + * Helper function to run a single iteration of the used collections dataflow + * analysis. Returns whether the lattice was changed. The lattice maps each + * program point to a set of used values and their possible users. Top is that + * no nodes are used yet. + */ +fn used_collections_dataflow( + editor: &FunctionEditor, + lattice: &mut Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>, + rev_po: &Vec<NodeID>, + bb_to_prefix_sum: &Vec<usize>, + control_subgraph: &Subgraph, + objects: &CollectionObjects, + bbs: &BasicBlocks, +) -> bool { + // Run dataflow analysis to figure out which accesses to collections induce + // clones. This dataflow analysis depends on basic block assignments and is + // more analogous to standard dataflow analysis in CFG + SSA IRs. This is + // the only place this form is used, so just hardcode it here. // // This forward dataflow analysis tracks which collections are used at each // program point, and by what user nodes. Collections are referred to using @@ -575,363 +674,364 @@ fn materialize_clones( // "sub-view" of the same collection. This does not include reads that "end" // (most reads, some calls, the `data` input of a write). This analysis does // not consider parallel mutations in fork-joins. - let rev_po = control_subgraph.rev_po(NodeID::new(0)); - let mut total_num_pts = 0; - let mut bb_to_prefix_sum = vec![0; bbs.0.len()]; - for ((idx, bb), insts) in zip(bbs.0.iter().enumerate(), bbs.1.iter()) { - if idx == bb.idx() { - bb_to_prefix_sum[idx] = total_num_pts; - total_num_pts += insts.len() + 1; - } - } - // Lattice maps each program point to a set of used values and their - // possible users. Top is that no nodes are used yet. let nodes = &editor.func().nodes; let func_id = editor.func_id(); - let meet = |left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>, - right: &BTreeMap<NodeID, BTreeSet<NodeID>>| { - for (used, users) in right.into_iter() { - left.entry(*used).or_default().extend(users.into_iter()); - } - }; - let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts]; - loop { - let mut changed = false; + let mut changed = false; - for bb in rev_po.iter() { - // The lattice value of the first point is the meet of the - // predecessor terminating lattice values. - let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]]; - let mut new_top_value = BTreeMap::new(); - // Clearing `top_value` is not necessary since used nodes are never - // removed from lattice values, only added. - for pred in control_subgraph.preds(*bb) { - let last_pt = bbs.1[pred.idx()].len(); - meet( - &mut new_top_value, - &lattice[bb_to_prefix_sum[pred.idx()] + last_pt], - ); - } - changed |= *old_top_value != new_top_value; - lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value; + for bb in rev_po.iter() { + // The lattice value of the first point is the meet of the + // predecessor terminating lattice values. + let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]]; + let mut new_top_value = BTreeMap::new(); + // Clearing `top_value` is not necessary since used nodes are never + // removed from lattice values, only added. + for pred in control_subgraph.preds(*bb) { + let last_pt = bbs.1[pred.idx()].len(); + meet( + &mut new_top_value, + &lattice[bb_to_prefix_sum[pred.idx()] + last_pt], + ); + } + changed |= *old_top_value != new_top_value; + lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value; - // The lattice value of following points are determined by their - // immediate preceding instructions. - let insts = &bbs.1[bb.idx()]; - for (prev_pt, inst) in insts.iter().enumerate() { - let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1]; - let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt]; - let mut new_value = prev_value.clone(); - match nodes[inst.idx()] { - Node::Phi { - control: _, - ref data, - } if !objects[&func_id].objects(*inst).is_empty() => { - for elem in data { - new_value.entry(*elem).or_default().insert(*inst); - } - new_value.remove(inst); - } - Node::Ternary { - op: TernaryOperator::Select, - first: _, - second, - third, - } - | Node::Reduce { - control: _, - init: second, - reduct: third, - } => { - if !objects[&func_id].objects(*inst).is_empty() { - new_value.entry(second).or_default().insert(*inst); - new_value.entry(third).or_default().insert(*inst); - new_value.remove(inst); - } - } - Node::Read { - collect, - indices: _, - } if !objects[&func_id].objects(*inst).is_empty() => { - new_value.entry(collect).or_default().insert(*inst); - new_value.remove(inst); - } - Node::Write { - collect, - data: _, - indices: _, - } => { - new_value.entry(collect).or_default().insert(*inst); - new_value.remove(inst); + // The lattice value of following points are determined by their + // immediate preceding instructions. + let insts = &bbs.1[bb.idx()]; + for (prev_pt, inst) in insts.iter().enumerate() { + let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1]; + let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt]; + let mut new_value = prev_value.clone(); + match nodes[inst.idx()] { + Node::Phi { + control: _, + ref data, + } if !objects[&func_id].objects(*inst).is_empty() => { + for elem in data { + new_value.entry(*elem).or_default().insert(*inst); } - Node::Call { - control: _, - function: callee, - dynamic_constants: _, - ref args, - } => { - let callee_objects = &objects[&callee]; - for (param_idx, arg) in args.into_iter().enumerate() { - if callee_objects - .param_to_object(param_idx) - .map(|object| { - callee_objects.is_mutated(object) - || callee_objects.returned_objects().contains(&object) - }) - .unwrap_or(false) - { - new_value.entry(*arg).or_default().insert(*inst); - } - } + new_value.remove(inst); + } + Node::Ternary { + op: TernaryOperator::Select, + first: _, + second, + third, + } + | Node::Reduce { + control: _, + init: second, + reduct: third, + } => { + if !objects[&func_id].objects(*inst).is_empty() { + new_value.entry(second).or_default().insert(*inst); + new_value.entry(third).or_default().insert(*inst); new_value.remove(inst); } - _ => {} } - changed |= *old_value != new_value; - lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value; - } - - // Handle reduces in this block specially at the very end. - let last_pt = insts.len(); - let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt]; - let mut new_bottom_value = old_bottom_value.clone(); - for inst in insts.iter() { - if let Node::Reduce { + Node::Read { + collect, + indices: _, + } if !objects[&func_id].objects(*inst).is_empty() => { + new_value.entry(collect).or_default().insert(*inst); + new_value.remove(inst); + } + Node::Write { + collect, + data: _, + indices: _, + } => { + new_value.entry(collect).or_default().insert(*inst); + new_value.remove(inst); + } + Node::Call { control: _, - init: _, - reduct, - } = nodes[inst.idx()] - { - assert!( - new_bottom_value.contains_key(&reduct), - "PANIC: Can't handle clones inside a reduction cycle currently." - ); - new_bottom_value.remove(inst); + function: callee, + dynamic_constants: _, + ref args, + } => { + let callee_objects = &objects[&callee]; + for (param_idx, arg) in args.into_iter().enumerate() { + if callee_objects + .param_to_object(param_idx) + .map(|object| { + callee_objects.is_mutated(object) + || callee_objects.returned_objects().contains(&object) + }) + .unwrap_or(false) + { + new_value.entry(*arg).or_default().insert(*inst); + } + } + new_value.remove(inst); } + _ => {} } - changed |= *old_bottom_value != new_bottom_value; - lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value; + changed |= *old_value != new_value; + lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value; } - if !changed { - break; + // Handle reduces in this block specially at the very end. + let last_pt = insts.len(); + let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt]; + let mut new_bottom_value = old_bottom_value.clone(); + for inst in insts.iter() { + if let Node::Reduce { + control: _, + init: _, + reduct, + } = nodes[inst.idx()] + { + assert!( + new_bottom_value.contains_key(&reduct), + "PANIC: Can't handle clones inside a reduction cycle currently." + ); + new_bottom_value.remove(inst); + } } - } - let mut super_value = BTreeMap::new(); - for value in lattice.iter() { - meet(&mut super_value, value); + changed |= *old_bottom_value != new_bottom_value; + lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value; } - // Helper to induce a clone when an implicit clone is identified. - let nodes = nodes.clone(); - let mut induce_clone = |object: NodeID, - user: NodeID, - value: &BTreeMap<NodeID, BTreeSet<NodeID>>| { - // If `user` already used `object` and tries to use it again, then the - // clone is a "loop induced" clone. Otherwise, it's a simple clone. - if !value[&object].contains(&user) { - let success = editor.edit(|mut edit| { - // Create the constant collection object for allocation. - let object_ty = typing[object.idx()]; - let object_cons = edit.add_zero_constant(object_ty); - let cons_node = edit.add_node(Node::Constant { id: object_cons }); + changed +} - // Create the clone into the new constant collection. - let clone_node = edit.add_node(Node::Write { - collect: cons_node, - data: object, - indices: vec![].into_boxed_slice(), - }); +/* + * Helper function to induce a clone once an object with multiple users has been + * found. + */ +fn induce_clone( + editor: &mut FunctionEditor, + object: NodeID, + user: NodeID, + value: &BTreeMap<NodeID, BTreeSet<NodeID>>, + super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>, + lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>, + rev_po: &Vec<NodeID>, + typing: &Vec<TypeID>, + control_subgraph: &Subgraph, + dom: &DomTree, + loops: &LoopTree, + bb_to_prefix_sum: &Vec<usize>, + bbs: &BasicBlocks, +) { + // If `user` already used `object` and tries to use it again, then the + // clone is a "loop induced" clone. Otherwise, it's a simple clone. + if !value[&object].contains(&user) { + let success = editor.edit(|mut edit| { + // Create the constant collection object for allocation. + let object_ty = typing[object.idx()]; + let object_cons = edit.add_zero_constant(object_ty); + let cons_node = edit.add_node(Node::Constant { id: object_cons }); - // Make user use the cloned object. - edit.replace_all_uses_where(object, clone_node, |id| *id == user) - }); - assert!(success); - } else { - // Figure out where to place that phi. This is the deepest - // loop header where `user` is responsible for making `object` used - // used at the top of the block, and the block dominates the block - // containing `user`. If `user` is a phi, then the region it's - // attached to is excluded from eligibility. - let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| { - lattice[bb_to_prefix_sum[bb.idx()]] - .get(&object) - .unwrap_or(&BTreeSet::new()) - .contains(&user) - && dom.does_dom(*bb, bbs.0[user.idx()]) - && loops.contains(*bb) - && loops.is_in_loop(*bb, bbs.0[user.idx()]) - && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()]) + // Create the clone into the new constant collection. + let clone_node = edit.add_node(Node::Write { + collect: cons_node, + data: object, + indices: vec![].into_boxed_slice(), }); - let top_block = eligible_blocks - .max_by_key(|bb| loops.nesting(*bb).unwrap()) - .unwrap(); - assert!(editor.func().nodes[top_block.idx()].is_region()); - // Figure out the users of `object` that we need to phi back - // upwards. Assign each user a number indicating how far down the - // user chain it is, higher is farther down. This is used for - // picking the most downstream user later. - let mut users: BTreeMap<NodeID, usize> = BTreeMap::new(); - let mut workset: BTreeSet<NodeID> = BTreeSet::new(); - workset.insert(object); - let mut chain_ordering = 1; - while let Some(pop) = workset.pop_first() { - let iterated_users: BTreeSet<_> = super_value - .get(&pop) - .map(|users| users.into_iter()) - .into_iter() - .flatten() - .map(|id| *id) - .filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()])) - .collect(); - workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id))); - for user in iterated_users { - *users.entry(user).or_default() = chain_ordering; - chain_ordering += 1; - } + // Make user use the cloned object. + edit.replace_all_uses_where(object, clone_node, |id| *id == user) + }); + assert!(success); + } else { + // Figure out where to place that phi. This is the deepest + // loop header where `user` is responsible for making `object` used + // used at the top of the block, and the block dominates the block + // containing `user`. If `user` is a phi, then the region it's + // attached to is excluded from eligibility. + let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| { + lattice[bb_to_prefix_sum[bb.idx()]] + .get(&object) + .unwrap_or(&BTreeSet::new()) + .contains(&user) + && dom.does_dom(*bb, bbs.0[user.idx()]) + && loops.contains(*bb) + && loops.is_in_loop(*bb, bbs.0[user.idx()]) + && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()]) + }); + let top_block = eligible_blocks + .max_by_key(|bb| loops.nesting(*bb).unwrap()) + .unwrap(); + assert!(editor.func().nodes[top_block.idx()].is_region()); + + // Figure out the users of `object` that we need to phi back + // upwards. Assign each user a number indicating how far down the + // user chain it is, higher is farther down. This is used for + // picking the most downstream user later. + let mut users: BTreeMap<NodeID, usize> = BTreeMap::new(); + let mut workset: BTreeSet<NodeID> = BTreeSet::new(); + workset.insert(object); + let mut chain_ordering = 1; + assert!(!super_value.is_empty()); + while let Some(pop) = workset.pop_first() { + let iterated_users: BTreeSet<_> = super_value + .get(&pop) + .map(|users| users.into_iter()) + .into_iter() + .flatten() + .map(|id| *id) + .filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()])) + .collect(); + workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id))); + for user in iterated_users { + *users.entry(user).or_default() = chain_ordering; + chain_ordering += 1; } + } - // The fringe users may not dominate any predecessors of the loop - // header. The following is some Juno code that exposes this: - // - // fn problematic(a : size) -> i32 { - // for i = 0 to a { - // let arr : i32[1]; - // for j = 0 to a { - // arr[0] = 1; - // } - // } - // return 0; - // } - // - // Note that `arr` induces a clone each iteration, since its value - // needs to be reset to all zeros. However, it should also be noted - // that the most fringe user of `arr`, the write inside the inner - // loop, does not dominate the bottom of the outer loop. Thus, we - // need to insert a phi in the bottom block of the outer loop to - // retrieve either the write, or `arr` before the inner loop. The - // general version of this problem requires the following solution. - // Our goal is to figure out which downstream user represents - // `object` at each block in the loop. We first assign each block - // containing a user the most downstream user it contains. Then, we - // create a dummy phi for every region (including the header) in the - // loop, which is the downstream user for that block. Then, every - // other block is assigned the downstream user of its single - // predecessor. This basically amounts to recreating SSA for - // `object` inside the loop. - let mut user_per_loop_bb = BTreeMap::new(); - let mut added_phis = BTreeMap::new(); - let mut top_phi = NodeID::new(0); - // Assign existing users. - for (user, ordering) in users.iter() { - let bb = bbs.0[user.idx()]; - if let Some(old_user) = user_per_loop_bb.get(&bb) - && users[old_user] > *ordering - { - } else { - user_per_loop_bb.insert(bb, *user); - } + // The fringe users may not dominate any predecessors of the loop + // header. The following is some Juno code that exposes this: + // + // fn problematic(a : size) -> i32 { + // for i = 0 to a { + // let arr : i32[1]; + // for j = 0 to a { + // arr[0] = 1; + // } + // } + // return 0; + // } + // + // Note that `arr` induces a clone each iteration, since its value + // needs to be reset to all zeros. However, it should also be noted + // that the most fringe user of `arr`, the write inside the inner + // loop, does not dominate the bottom of the outer loop. Thus, we + // need to insert a phi in the bottom block of the outer loop to + // retrieve either the write, or `arr` before the inner loop. The + // general version of this problem requires the following solution. + // Our goal is to figure out which downstream user represents + // `object` at each block in the loop. We first assign each block + // containing a user the most downstream user it contains. Then, we + // create a dummy phi for every region (including the header) in the + // loop, which is the downstream user for that block. Then, every + // other block is assigned the downstream user of its single + // predecessor. This basically amounts to recreating SSA for + // `object` inside the loop. + let mut user_per_loop_bb = BTreeMap::new(); + let mut added_phis = BTreeMap::new(); + let mut top_phi = NodeID::new(0); + // Assign existing users. + for (user, ordering) in users.iter() { + let bb = bbs.0[user.idx()]; + if let Some(old_user) = user_per_loop_bb.get(&bb) + && users[old_user] > *ordering + { + } else { + user_per_loop_bb.insert(bb, *user); } - // Assign dummy phis. - for bb in loops.nodes_in_loop(top_block) { - if (!user_per_loop_bb.contains_key(&bb) || bb == top_block) - && editor.func().nodes[bb.idx()].is_region() - { - let success = editor.edit(|mut edit| { - let phi_node = edit.add_node(Node::Phi { - control: bb, - data: empty().collect(), - }); - if bb != top_block || !user_per_loop_bb.contains_key(&bb) { - user_per_loop_bb.insert(bb, phi_node); - } - if bb == top_block { - top_phi = phi_node; - } - added_phis.insert(phi_node, bb); - Ok(edit) + } + // Assign dummy phis. + for bb in loops.nodes_in_loop(top_block) { + if (!user_per_loop_bb.contains_key(&bb) || bb == top_block) + && editor.func().nodes[bb.idx()].is_region() + { + let success = editor.edit(|mut edit| { + let phi_node = edit.add_node(Node::Phi { + control: bb, + data: empty().collect(), }); - assert!(success); - } + if bb != top_block || !user_per_loop_bb.contains_key(&bb) { + user_per_loop_bb.insert(bb, phi_node); + } + if bb == top_block { + top_phi = phi_node; + } + added_phis.insert(phi_node, bb); + Ok(edit) + }); + assert!(success); } - // Assign users for the rest of the blocks. - for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) { - if !user_per_loop_bb.contains_key(&bb) { - assert!(control_subgraph.preds(*bb).count() == 1); - user_per_loop_bb.insert( - *bb, - user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()], - ); - } + } + // Assign users for the rest of the blocks. + for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) { + if !user_per_loop_bb.contains_key(&bb) { + assert!(control_subgraph.preds(*bb).count() == 1); + user_per_loop_bb.insert( + *bb, + user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()], + ); } + } - // Induce the clone. - let success = editor.edit(|mut edit| { - // Create the constant collection object for allocation. - let object_ty = typing[object.idx()]; - let object_cons = edit.add_zero_constant(object_ty); - let cons_node = edit.add_node(Node::Constant { id: object_cons }); + // Induce the clone. + let success = editor.edit(|mut edit| { + // Create the constant collection object for allocation. + let object_ty = typing[object.idx()]; + let object_cons = edit.add_zero_constant(object_ty); + let cons_node = edit.add_node(Node::Constant { id: object_cons }); - // Create the phis. - let mut phi_map = BTreeMap::new(); - let mut real_phis = BTreeSet::new(); - for (dummy, bb) in added_phis { - let real = edit.add_node(Node::Phi { - control: bb, - data: control_subgraph - .preds(bb) - .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node)) - .collect(), - }); - phi_map.insert(dummy, real); - real_phis.insert(real); - } - - // Create the clone into the phi. - let real_top_phi = phi_map[&top_phi]; - let clone_node = edit.add_node(Node::Write { - collect: real_top_phi, - data: object, - indices: vec![].into_boxed_slice(), + // Create the phis. + let mut phi_map = BTreeMap::new(); + let mut real_phis = BTreeSet::new(); + for (dummy, bb) in added_phis { + let real = edit.add_node(Node::Phi { + control: bb, + data: control_subgraph + .preds(bb) + .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node)) + .collect(), }); + phi_map.insert(dummy, real); + real_phis.insert(real); + } - // Make users use the cloned object. - edit = edit.replace_all_uses_where(object, clone_node, |id| { - id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()]) - })?; + // Create the clone into the phi. + let real_top_phi = phi_map[&top_phi]; + let clone_node = edit.add_node(Node::Write { + collect: real_top_phi, + data: object, + indices: vec![].into_boxed_slice(), + }); - // Get rid of the dummy phis. - for (dummy, real) in phi_map { - edit = edit.replace_all_uses(dummy, real)?; - edit = edit.delete_node(dummy)?; - } + // Make users use the cloned object. + edit = edit.replace_all_uses_where(object, clone_node, |id| { + id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()]) + })?; - // Make phis use the clone instead of the top phi. - edit = - edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)?; + // Get rid of the dummy phis. + for (dummy, real) in phi_map { + edit = edit.replace_all_uses(dummy, real)?; + edit = edit.delete_node(dummy)?; + } - Ok(edit) - }); - assert!(success); + // Make phis use the clone instead of the top phi. + edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node) + }); + assert!(success); - // De-duplicate phis. - gvn(editor, false); + // De-duplicate phis. + gvn(editor, false); - // Get rid of unused phis. - dce(editor); + // Get rid of unused phis. + dce(editor); - // Simplify phis. - phi_elim(editor); - } - }; + // Simplify phis. + phi_elim(editor); + } +} - // Now that we've computed the used collections dataflow analysis, use the - // results to materialize a clone whenever a node attempts to use an already - // used node. As soon as any clone is found, return since that clone needs - // to get placed before other clones can be discovered. Traverse blocks in - // postorder so that clones inside loops are discovered before loop-induced - // clones. +/* + * Helper function to analyze lattice values at each program point and find + * multiple dynamic users of a single write. Return as soon as any clone is + * found. + */ +fn find_clones( + editor: &mut FunctionEditor, + super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>, + lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>, + rev_po: &Vec<NodeID>, + typing: &Vec<TypeID>, + control_subgraph: &Subgraph, + dom: &DomTree, + loops: &LoopTree, + objects: &CollectionObjects, + bb_to_prefix_sum: &Vec<usize>, + bbs: &BasicBlocks, +) -> bool { + let nodes = &editor.func().nodes; + let func_id = editor.func_id(); for bb in rev_po.iter().rev() { let insts = &bbs.1[bb.idx()]; // Accumulate predecessor bottom used sets for phis. Phis are special in @@ -955,7 +1055,21 @@ fn materialize_clones( bottom.clone() }); if bottom.contains_key(&arg) { - induce_clone(*arg, *inst, bottom); + induce_clone( + editor, + *arg, + *inst, + bottom, + super_value, + lattice, + rev_po, + typing, + control_subgraph, + dom, + loops, + bb_to_prefix_sum, + bbs, + ); return true; } else { // Subsequent phis using `arg` along the same @@ -971,11 +1085,39 @@ fn materialize_clones( third, } => { if value.contains_key(&second) { - induce_clone(second, *inst, &value); + induce_clone( + editor, + second, + *inst, + &value, + super_value, + lattice, + rev_po, + typing, + control_subgraph, + dom, + loops, + bb_to_prefix_sum, + bbs, + ); return true; } if value.contains_key(&third) { - induce_clone(third, *inst, &value); + induce_clone( + editor, + third, + *inst, + &value, + super_value, + lattice, + rev_po, + typing, + control_subgraph, + dom, + loops, + bb_to_prefix_sum, + bbs, + ); return true; } } @@ -985,7 +1127,21 @@ fn materialize_clones( reduct: _, } => { if value.contains_key(&init) { - induce_clone(init, *inst, &value); + induce_clone( + editor, + init, + *inst, + &value, + super_value, + lattice, + rev_po, + typing, + control_subgraph, + dom, + loops, + bb_to_prefix_sum, + bbs, + ); return true; } } @@ -994,7 +1150,21 @@ fn materialize_clones( indices: _, } if !objects[&func_id].objects(*inst).is_empty() => { if value.contains_key(&collect) { - induce_clone(collect, *inst, &value); + induce_clone( + editor, + collect, + *inst, + &value, + super_value, + lattice, + rev_po, + typing, + control_subgraph, + dom, + loops, + bb_to_prefix_sum, + bbs, + ); return true; } } @@ -1004,7 +1174,21 @@ fn materialize_clones( indices: _, } => { if value.contains_key(&collect) { - induce_clone(collect, *inst, &value); + induce_clone( + editor, + collect, + *inst, + &value, + super_value, + lattice, + rev_po, + typing, + control_subgraph, + dom, + loops, + bb_to_prefix_sum, + bbs, + ); return true; } } @@ -1025,7 +1209,21 @@ fn materialize_clones( .unwrap_or(false) && value.contains_key(arg) { - induce_clone(*arg, *inst, value); + induce_clone( + editor, + *arg, + *inst, + value, + super_value, + lattice, + rev_po, + typing, + control_subgraph, + dom, + loops, + bb_to_prefix_sum, + bbs, + ); return true; } } diff --git a/juno_samples/implicit_clone/src/implicit_clone.jn b/juno_samples/implicit_clone/src/implicit_clone.jn index d06a64981db4e74f9441c3db5e0b6220d4729dc0..67bdd44a68de51a9acd1a242c0b9bb40ad983fcd 100644 --- a/juno_samples/implicit_clone/src/implicit_clone.jn +++ b/juno_samples/implicit_clone/src/implicit_clone.jn @@ -64,16 +64,32 @@ fn tricky_loop_implicit_clone(a : usize, b : usize) -> i32 { fn tricky2_loop_implicit_clone(a : usize, b : usize) -> i32 { let x = 0; for i = 0 to 3 { - let arr : i32[1]; + let arr1 : i32[1]; + let arr2 : i32[1]; if a == b { - arr[0] = 6; + arr1[0] = 6; } else { - arr[0] = 9; + arr2[0] = 9; } + arr1[0] = 2; for j = 0 to 4 { - arr[0] += 1; + arr2[0] += 1; + } + x += arr2[0]; + } + return x; +} + +#[entry] +fn tricky3_loop_implicit_clone(a : usize, b : usize) -> usize { + let x = 0; + for i = 0 to b { + let arr1 : usize[10]; + let arr2 : usize[10]; + arr1[1] = 1; + for kk = 0 to 10 { + arr2[kk] += arr1[kk]; } - x += arr[0]; } return x; } diff --git a/juno_samples/implicit_clone/src/main.rs b/juno_samples/implicit_clone/src/main.rs index c0adaaef28c84278b585e3ecf27014217e0ccaf7..a46a67280c96aade3a056fe594443122972394e2 100644 --- a/juno_samples/implicit_clone/src/main.rs +++ b/juno_samples/implicit_clone/src/main.rs @@ -27,6 +27,10 @@ fn main() { println!("{}", output); assert_eq!(output, 39); + let output = tricky3_loop_implicit_clone(5, 7).await; + println!("{}", output); + assert_eq!(output, 0); + let output = no_implicit_clone(4).await; println!("{}", output); assert_eq!(output, 13);