rarbore2 · rarbore2
--- a/hercules_opt/src/legalize_reference_semantics.rs

+ 528

− 331
+++ b/hercules_opt/src/legalize_reference_semantics.rs

+ 528

− 331
 @@ -540,10 +540,108 @@ fn materialize_clones(
 @@ -540,10 +540,108 @@ fn materialize_clones(
    objects: &CollectionObjects,
    bbs: &BasicBlocks,
 ) -> bool {
-    // First, run dataflow analysis to figure out which access to collections
+    let rev_po = control_subgraph.rev_po(NodeID::new(0));
-    // induce clones. This dataflow analysis depends on basic block assignments
+    let mut total_num_pts = 0;
-    // and is more analogous to standard dataflow analysis in CFG + SSA IRs.
+    let mut bb_to_prefix_sum = vec![0; bbs.0.len()];
-    // This is the only place this form is used, so just hardcode it here.
+    for ((idx, bb), insts) in zip(bbs.0.iter().enumerate(), bbs.1.iter()) {
+        if idx == bb.idx() {
+            bb_to_prefix_sum[idx] = total_num_pts;
+            total_num_pts += insts.len() + 1;
+        }
+    }
+    // Calculate two lattices - one that includes back edges, and one that
+    // doesn't. We want to handle simple clones before loop induced clones, so
+    // we first materialize clones based on the no-back-edges lattice, and hten
+    // based on the full lattice.
+    let mut no_back_edge_lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> =
+        vec![BTreeMap::new(); total_num_pts];
+    used_collections_dataflow(
+        editor,
+        &mut no_back_edge_lattice,
+        &rev_po,
+        &bb_to_prefix_sum,
+        control_subgraph,
+        objects,
+        bbs,
+    );
+    let mut super_value = BTreeMap::new();
+    if find_clones(
+        editor,
+        &super_value,
+        &no_back_edge_lattice,
+        &rev_po,
+        &typing,
+        control_subgraph,
+        dom,
+        loops,
+        objects,
+        &bb_to_prefix_sum,
+        bbs,
+    ) {
+        return true;
+    }
+    // After inducing simple clones, calculate the full lattice and materialize
+    // any loop induced clones.
+    let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts];
+    loop {
+        let changed = used_collections_dataflow(
+            editor,
+            &mut lattice,
+            &rev_po,
+            &bb_to_prefix_sum,
+            control_subgraph,
+            objects,
+            bbs,
+        );
+        if !changed {
+            break;
+        }
+    }
+    for value in lattice.iter() {
+        meet(&mut super_value, value);
+    }
+    find_clones(
+        editor,
+        &super_value,
+        &lattice,
+        &rev_po,
+        &typing,
+        control_subgraph,
+        dom,
+        loops,
+        objects,
+        &bb_to_prefix_sum,
+        bbs,
+    )
+}
+fn meet(left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>, right: &BTreeMap<NodeID, BTreeSet<NodeID>>) {
+    for (used, users) in right.into_iter() {
+        left.entry(*used).or_default().extend(users.into_iter());
+    }
+}
+/*
+ * Helper function to run a single iteration of the used collections dataflow
+ * analysis. Returns whether the lattice was changed. The lattice maps each
+ * program point to a set of used values and their possible users. Top is that
+ * no nodes are used yet.
+ */
+fn used_collections_dataflow(
+    editor: &FunctionEditor,
+    lattice: &mut Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
+    rev_po: &Vec<NodeID>,
+    bb_to_prefix_sum: &Vec<usize>,
+    control_subgraph: &Subgraph,
+    objects: &CollectionObjects,
+    bbs: &BasicBlocks,
+) -> bool {
+    // Run dataflow analysis to figure out which accesses to collections induce
+    // clones. This dataflow analysis depends on basic block assignments and is
+    // more analogous to standard dataflow analysis in CFG + SSA IRs. This is
+    // the only place this form is used, so just hardcode it here.
    //
    // This forward dataflow analysis tracks which collections are used at each
    // program point, and by what user nodes. Collections are referred to using
 @@ -576,363 +674,364 @@ fn materialize_clones(
 @@ -576,363 +674,364 @@ fn materialize_clones(
    // "sub-view" of the same collection. This does not include reads that "end"
    // (most reads, some calls, the `data` input of a write). This analysis does
    // not consider parallel mutations in fork-joins.
-    let rev_po = control_subgraph.rev_po(NodeID::new(0));
-    let mut total_num_pts = 0;
-    let mut bb_to_prefix_sum = vec![0; bbs.0.len()];
-    for ((idx, bb), insts) in zip(bbs.0.iter().enumerate(), bbs.1.iter()) {
-        if idx == bb.idx() {
-            bb_to_prefix_sum[idx] = total_num_pts;
-            total_num_pts += insts.len() + 1;
-        }
-    }
-    // Lattice maps each program point to a set of used values and their
-    // possible users. Top is that no nodes are used yet.
    let nodes = &editor.func().nodes;
    let func_id = editor.func_id();
-    let meet = |left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>,
+    let mut changed = false;
-                right: &BTreeMap<NodeID, BTreeSet<NodeID>>| {
-        for (used, users) in right.into_iter() {
-            left.entry(*used).or_default().extend(users.into_iter());
-        }
-    };
-    let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts];
-    loop {
-        let mut changed = false;
-        for bb in rev_po.iter() {
+    for bb in rev_po.iter() {
-            // The lattice value of the first point is the meet of the
+        // The lattice value of the first point is the meet of the
-            // predecessor terminating lattice values.
+        // predecessor terminating lattice values.
-            let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]];
+        let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]];
-            let mut new_top_value = BTreeMap::new();
+        let mut new_top_value = BTreeMap::new();
-            // Clearing `top_value` is not necessary since used nodes are never
+        // Clearing `top_value` is not necessary since used nodes are never
-            // removed from lattice values, only added.
+        // removed from lattice values, only added.
-            for pred in control_subgraph.preds(*bb) {
+        for pred in control_subgraph.preds(*bb) {
-                let last_pt = bbs.1[pred.idx()].len();
+            let last_pt = bbs.1[pred.idx()].len();
-                meet(
+            meet(
-                    &mut new_top_value,
+                &mut new_top_value,
-                    &lattice[bb_to_prefix_sum[pred.idx()] + last_pt],
+                &lattice[bb_to_prefix_sum[pred.idx()] + last_pt],
-                );
+            );
-            }
+        }
-            changed |= *old_top_value != new_top_value;
+        changed |= *old_top_value != new_top_value;
-            lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value;
+        lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value;
-            // The lattice value of following points are determined by their
+        // The lattice value of following points are determined by their
-            // immediate preceding instructions.
+        // immediate preceding instructions.
-            let insts = &bbs.1[bb.idx()];
+        let insts = &bbs.1[bb.idx()];
-            for (prev_pt, inst) in insts.iter().enumerate() {
+        for (prev_pt, inst) in insts.iter().enumerate() {
-                let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1];
+            let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1];
-                let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
+            let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
-                let mut new_value = prev_value.clone();
+            let mut new_value = prev_value.clone();
-                match nodes[inst.idx()] {
+            match nodes[inst.idx()] {
-                    Node::Phi {
+                Node::Phi {
-                        control: _,
+                    control: _,
-                        ref data,
+                    ref data,
-                    } if !objects[&func_id].objects(*inst).is_empty() => {
+                } if !objects[&func_id].objects(*inst).is_empty() => {
-                        for elem in data {
+                    for elem in data {
-                            new_value.entry(*elem).or_default().insert(*inst);
+                        new_value.entry(*elem).or_default().insert(*inst);
-                        }
-                        new_value.remove(inst);
-                    }
-                    Node::Ternary {
-                        op: TernaryOperator::Select,
-                        first: _,
-                        second,
-                        third,
-                    }
-                    | Node::Reduce {
-                        control: _,
-                        init: second,
-                        reduct: third,
-                    } => {
-                        if !objects[&func_id].objects(*inst).is_empty() {
-                            new_value.entry(second).or_default().insert(*inst);
-                            new_value.entry(third).or_default().insert(*inst);
-                            new_value.remove(inst);
-                        }
-                    }
-                    Node::Read {
-                        collect,
-                        indices: _,
-                    } if !objects[&func_id].objects(*inst).is_empty() => {
-                        new_value.entry(collect).or_default().insert(*inst);
-                        new_value.remove(inst);
-                    }
-                    Node::Write {
-                        collect,
-                        data: _,
-                        indices: _,
-                    } => {
-                        new_value.entry(collect).or_default().insert(*inst);
-                        new_value.remove(inst);
                    }
-                    Node::Call {
+                    new_value.remove(inst);
-                        control: _,
+                }
-                        function: callee,
+                Node::Ternary {
-                        dynamic_constants: _,
+                    op: TernaryOperator::Select,
-                        ref args,
+                    first: _,
-                    } => {
+                    second,
-                        let callee_objects = &objects[&callee];
+                    third,
-                        for (param_idx, arg) in args.into_iter().enumerate() {
+                }
-                            if callee_objects
+                | Node::Reduce {
-                                .param_to_object(param_idx)
+                    control: _,
-                                .map(|object| {
+                    init: second,
-                                    callee_objects.is_mutated(object)
+                    reduct: third,
-                                        || callee_objects.returned_objects().contains(&object)
+                } => {
-                                })
+                    if !objects[&func_id].objects(*inst).is_empty() {
-                                .unwrap_or(false)
+                        new_value.entry(second).or_default().insert(*inst);
-                            {
+                        new_value.entry(third).or_default().insert(*inst);
-                                new_value.entry(*arg).or_default().insert(*inst);
-                            }
-                        }
                        new_value.remove(inst);
                    }
-                    _ => {}
                }
-                changed |= *old_value != new_value;
+                Node::Read {
-                lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value;
+                    collect,
-            }
+                    indices: _,
+                } if !objects[&func_id].objects(*inst).is_empty() => {
-            // Handle reduces in this block specially at the very end.
+                    new_value.entry(collect).or_default().insert(*inst);
-            let last_pt = insts.len();
+                    new_value.remove(inst);
-            let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt];
+                }
-            let mut new_bottom_value = old_bottom_value.clone();
+                Node::Write {
-            for inst in insts.iter() {
+                    collect,
-                if let Node::Reduce {
+                    data: _,
+                    indices: _,
+                } => {
+                    new_value.entry(collect).or_default().insert(*inst);
+                    new_value.remove(inst);
+                }
+                Node::Call {
                    control: _,
-                    init: _,
+                    function: callee,
-                    reduct,
+                    dynamic_constants: _,
-                } = nodes[inst.idx()]
+                    ref args,
-                {
+                } => {
-                    assert!(
+                    let callee_objects = &objects[&callee];
-                        new_bottom_value.contains_key(&reduct),
+                    for (param_idx, arg) in args.into_iter().enumerate() {
-                        "PANIC: Can't handle clones inside a reduction cycle currently."
+                        if callee_objects
-                    );
+                            .param_to_object(param_idx)
-                    new_bottom_value.remove(inst);
+                            .map(|object| {
+                                callee_objects.is_mutated(object)
+                                    || callee_objects.returned_objects().contains(&object)
+                            })
+                            .unwrap_or(false)
+                        {
+                            new_value.entry(*arg).or_default().insert(*inst);
+                        }
+                    }
+                    new_value.remove(inst);
                }
+                _ => {}
            }
-            changed |= *old_bottom_value != new_bottom_value;
+            changed |= *old_value != new_value;
-            lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value;
+            lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value;
        }
-        if !changed {
+        // Handle reduces in this block specially at the very end.
-            break;
+        let last_pt = insts.len();
+        let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt];
+        let mut new_bottom_value = old_bottom_value.clone();
+        for inst in insts.iter() {
+            if let Node::Reduce {
+                control: _,
+                init: _,
+                reduct,
+            } = nodes[inst.idx()]
+            {
+                assert!(
+                    new_bottom_value.contains_key(&reduct),
+                    "PANIC: Can't handle clones inside a reduction cycle currently."
+                );
+                new_bottom_value.remove(inst);
+            }
        }
-    }
+        changed |= *old_bottom_value != new_bottom_value;
-    let mut super_value = BTreeMap::new();
+        lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value;
-    for value in lattice.iter() {
-        meet(&mut super_value, value);
    }
-    // Helper to induce a clone when an implicit clone is identified.
+    changed
-    let nodes = nodes.clone();
+}
-    let mut induce_clone = |object: NodeID,
-                            user: NodeID,
-                            value: &BTreeMap<NodeID, BTreeSet<NodeID>>| {
-        // If `user` already used `object` and tries to use it again, then the
-        // clone is a "loop induced" clone. Otherwise, it's a simple clone.
-        if !value[&object].contains(&user) {
-            let success = editor.edit(|mut edit| {
-                // Create the constant collection object for allocation.
-                let object_ty = typing[object.idx()];
-                let object_cons = edit.add_zero_constant(object_ty);
-                let cons_node = edit.add_node(Node::Constant { id: object_cons });
-                // Create the clone into the new constant collection.
+/*
-                let clone_node = edit.add_node(Node::Write {
+ * Helper function to induce a clone once an object with multiple users has been
-                    collect: cons_node,
+ * found.
-                    data: object,
+ */
-                    indices: vec![].into_boxed_slice(),
+fn induce_clone(
-                });
+    editor: &mut FunctionEditor,
+    object: NodeID,
+    user: NodeID,
+    value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
+    super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
+    lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
+    rev_po: &Vec<NodeID>,
+    typing: &Vec<TypeID>,
+    control_subgraph: &Subgraph,
+    dom: &DomTree,
+    loops: &LoopTree,
+    bb_to_prefix_sum: &Vec<usize>,
+    bbs: &BasicBlocks,
+) {
+    // If `user` already used `object` and tries to use it again, then the
+    // clone is a "loop induced" clone. Otherwise, it's a simple clone.
+    if !value[&object].contains(&user) {
+        let success = editor.edit(|mut edit| {
+            // Create the constant collection object for allocation.
+            let object_ty = typing[object.idx()];
+            let object_cons = edit.add_zero_constant(object_ty);
+            let cons_node = edit.add_node(Node::Constant { id: object_cons });
-                // Make user use the cloned object.
+            // Create the clone into the new constant collection.
-                edit.replace_all_uses_where(object, clone_node, |id| *id == user)
+            let clone_node = edit.add_node(Node::Write {
-            });
+                collect: cons_node,
-            assert!(success);
+                data: object,
-        } else {
+                indices: vec![].into_boxed_slice(),
-            // Figure out where to place that phi. This is the deepest
-            // loop header where `user` is responsible for making `object` used
-            // used at the top of the block, and the block dominates the block
-            // containing `user`. If `user` is a phi, then the region it's
-            // attached to is excluded from eligibility.
-            let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| {
-                lattice[bb_to_prefix_sum[bb.idx()]]
-                    .get(&object)
-                    .unwrap_or(&BTreeSet::new())
-                    .contains(&user)
-                    && dom.does_dom(*bb, bbs.0[user.idx()])
-                    && loops.contains(*bb)
-                    && loops.is_in_loop(*bb, bbs.0[user.idx()])
-                    && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()])
            });
-            let top_block = eligible_blocks
-                .max_by_key(|bb| loops.nesting(*bb).unwrap())
-                .unwrap();
-            assert!(editor.func().nodes[top_block.idx()].is_region());
-            // Figure out the users of `object` that we need to phi back
+            // Make user use the cloned object.
-            // upwards. Assign each user a number indicating how far down the
+            edit.replace_all_uses_where(object, clone_node, |id| *id == user)
-            // user chain it is, higher is farther down. This is used for
+        });
-            // picking the most downstream user later.
+        assert!(success);
-            let mut users: BTreeMap<NodeID, usize> = BTreeMap::new();
+    } else {
-            let mut workset: BTreeSet<NodeID> = BTreeSet::new();
+        // Figure out where to place that phi. This is the deepest
-            workset.insert(object);
+        // loop header where `user` is responsible for making `object` used
-            let mut chain_ordering = 1;
+        // used at the top of the block, and the block dominates the block
-            while let Some(pop) = workset.pop_first() {
+        // containing `user`. If `user` is a phi, then the region it's
-                let iterated_users: BTreeSet<_> = super_value
+        // attached to is excluded from eligibility.
-                    .get(&pop)
+        let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| {
-                    .map(|users| users.into_iter())
+            lattice[bb_to_prefix_sum[bb.idx()]]
-                    .into_iter()
+                .get(&object)
-                    .flatten()
+                .unwrap_or(&BTreeSet::new())
-                    .map(|id| *id)
+                .contains(&user)
-                    .filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()]))
+                && dom.does_dom(*bb, bbs.0[user.idx()])
-                    .collect();
+                && loops.contains(*bb)
-                workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id)));
+                && loops.is_in_loop(*bb, bbs.0[user.idx()])
-                for user in iterated_users {
+                && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()])
-                    *users.entry(user).or_default() = chain_ordering;
+        });
-                    chain_ordering += 1;
+        let top_block = eligible_blocks
-                }
+            .max_by_key(|bb| loops.nesting(*bb).unwrap())
+            .unwrap();
+        assert!(editor.func().nodes[top_block.idx()].is_region());
+        // Figure out the users of `object` that we need to phi back
+        // upwards. Assign each user a number indicating how far down the
+        // user chain it is, higher is farther down. This is used for
+        // picking the most downstream user later.
+        let mut users: BTreeMap<NodeID, usize> = BTreeMap::new();
+        let mut workset: BTreeSet<NodeID> = BTreeSet::new();
+        workset.insert(object);
+        let mut chain_ordering = 1;
+        assert!(!super_value.is_empty());
+        while let Some(pop) = workset.pop_first() {
+            let iterated_users: BTreeSet<_> = super_value
+                .get(&pop)
+                .map(|users| users.into_iter())
+                .into_iter()
+                .flatten()
+                .map(|id| *id)
+                .filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()]))
+                .collect();
+            workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id)));
+            for user in iterated_users {
+                *users.entry(user).or_default() = chain_ordering;
+                chain_ordering += 1;
            }
+        }
-            // The fringe users may not dominate any predecessors of the loop
+        // The fringe users may not dominate any predecessors of the loop
-            // header. The following is some Juno code that exposes this:
+        // header. The following is some Juno code that exposes this:
-            //
+        //
-            // fn problematic(a : size) -> i32 {
+        // fn problematic(a : size) -> i32 {
-            //   for i = 0 to a {
+        //   for i = 0 to a {
-            //     let arr : i32[1];
+        //     let arr : i32[1];
-            //     for j = 0 to a {
+        //     for j = 0 to a {
-            //       arr[0] = 1;
+        //       arr[0] = 1;
-            //     }
+        //     }
-            //   }
+        //   }
-            //   return 0;
+        //   return 0;
-            // }
+        // }
-            //
+        //
-            // Note that `arr` induces a clone each iteration, since its value
+        // Note that `arr` induces a clone each iteration, since its value
-            // needs to be reset to all zeros. However, it should also be noted
+        // needs to be reset to all zeros. However, it should also be noted
-            // that the most fringe user of `arr`, the write inside the inner
+        // that the most fringe user of `arr`, the write inside the inner
-            // loop, does not dominate the bottom of the outer loop. Thus, we
+        // loop, does not dominate the bottom of the outer loop. Thus, we
-            // need to insert a phi in the bottom block of the outer loop to
+        // need to insert a phi in the bottom block of the outer loop to
-            // retrieve either the write, or `arr` before the inner loop. The
+        // retrieve either the write, or `arr` before the inner loop. The
-            // general version of this problem requires the following solution.
+        // general version of this problem requires the following solution.
-            // Our goal is to figure out which downstream user represents
+        // Our goal is to figure out which downstream user represents
-            // `object` at each block in the loop. We first assign each block
+        // `object` at each block in the loop. We first assign each block
-            // containing a user the most downstream user it contains. Then, we
+        // containing a user the most downstream user it contains. Then, we
-            // create a dummy phi for every region (including the header) in the
+        // create a dummy phi for every region (including the header) in the
-            // loop, which is the downstream user for that block. Then, every
+        // loop, which is the downstream user for that block. Then, every
-            // other block is assigned the downstream user of its single
+        // other block is assigned the downstream user of its single
-            // predecessor. This basically amounts to recreating SSA for
+        // predecessor. This basically amounts to recreating SSA for
-            // `object` inside the loop.
+        // `object` inside the loop.
-            let mut user_per_loop_bb = BTreeMap::new();
+        let mut user_per_loop_bb = BTreeMap::new();
-            let mut added_phis = BTreeMap::new();
+        let mut added_phis = BTreeMap::new();
-            let mut top_phi = NodeID::new(0);
+        let mut top_phi = NodeID::new(0);
-            // Assign existing users.
+        // Assign existing users.
-            for (user, ordering) in users.iter() {
+        for (user, ordering) in users.iter() {
-                let bb = bbs.0[user.idx()];
+            let bb = bbs.0[user.idx()];
-                if let Some(old_user) = user_per_loop_bb.get(&bb)
+            if let Some(old_user) = user_per_loop_bb.get(&bb)
-                    && users[old_user] > *ordering
+                && users[old_user] > *ordering
-                {
+            {
-                } else {
+            } else {
-                    user_per_loop_bb.insert(bb, *user);
+                user_per_loop_bb.insert(bb, *user);
-                }
            }
-            // Assign dummy phis.
+        }
-            for bb in loops.nodes_in_loop(top_block) {
+        // Assign dummy phis.
-                if (!user_per_loop_bb.contains_key(&bb) || bb == top_block)
+        for bb in loops.nodes_in_loop(top_block) {
-                    && editor.func().nodes[bb.idx()].is_region()
+            if (!user_per_loop_bb.contains_key(&bb) || bb == top_block)
-                {
+                && editor.func().nodes[bb.idx()].is_region()
-                    let success = editor.edit(|mut edit| {
+            {
-                        let phi_node = edit.add_node(Node::Phi {
+                let success = editor.edit(|mut edit| {
-                            control: bb,
+                    let phi_node = edit.add_node(Node::Phi {
-                            data: empty().collect(),
+                        control: bb,
-                        });
+                        data: empty().collect(),
-                        if bb != top_block || !user_per_loop_bb.contains_key(&bb) {
-                            user_per_loop_bb.insert(bb, phi_node);
-                        }
-                        if bb == top_block {
-                            top_phi = phi_node;
-                        }
-                        added_phis.insert(phi_node, bb);
-                        Ok(edit)
                    });
-                    assert!(success);
+                    if bb != top_block || !user_per_loop_bb.contains_key(&bb) {
-                }
+                        user_per_loop_bb.insert(bb, phi_node);
+                    }
+                    if bb == top_block {
+                        top_phi = phi_node;
+                    }
+                    added_phis.insert(phi_node, bb);
+                    Ok(edit)
+                });
+                assert!(success);
            }
-            // Assign users for the rest of the blocks.
+        }
-            for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) {
+        // Assign users for the rest of the blocks.
-                if !user_per_loop_bb.contains_key(&bb) {
+        for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) {
-                    assert!(control_subgraph.preds(*bb).count() == 1);
+            if !user_per_loop_bb.contains_key(&bb) {
-                    user_per_loop_bb.insert(
+                assert!(control_subgraph.preds(*bb).count() == 1);
-                        *bb,
+                user_per_loop_bb.insert(
-                        user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()],
+                    *bb,
-                    );
+                    user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()],
-                }
+                );
            }
+        }
-            // Induce the clone.
+        // Induce the clone.
-            let success = editor.edit(|mut edit| {
+        let success = editor.edit(|mut edit| {
-                // Create the constant collection object for allocation.
+            // Create the constant collection object for allocation.
-                let object_ty = typing[object.idx()];
+            let object_ty = typing[object.idx()];
-                let object_cons = edit.add_zero_constant(object_ty);
+            let object_cons = edit.add_zero_constant(object_ty);
-                let cons_node = edit.add_node(Node::Constant { id: object_cons });
+            let cons_node = edit.add_node(Node::Constant { id: object_cons });
-                // Create the phis.
+            // Create the phis.
-                let mut phi_map = BTreeMap::new();
+            let mut phi_map = BTreeMap::new();
-                let mut real_phis = BTreeSet::new();
+            let mut real_phis = BTreeSet::new();
-                for (dummy, bb) in added_phis {
+            for (dummy, bb) in added_phis {
-                    let real = edit.add_node(Node::Phi {
+                let real = edit.add_node(Node::Phi {
-                        control: bb,
+                    control: bb,
-                        data: control_subgraph
+                    data: control_subgraph
-                            .preds(bb)
+                        .preds(bb)
-                            .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node))
+                        .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node))
-                            .collect(),
+                        .collect(),
-                    });
-                    phi_map.insert(dummy, real);
-                    real_phis.insert(real);
-                }
-                // Create the clone into the phi.
-                let real_top_phi = phi_map[&top_phi];
-                let clone_node = edit.add_node(Node::Write {
-                    collect: real_top_phi,
-                    data: object,
-                    indices: vec![].into_boxed_slice(),
                });
+                phi_map.insert(dummy, real);
+                real_phis.insert(real);
+            }
-                // Make users use the cloned object.
+            // Create the clone into the phi.
-                edit = edit.replace_all_uses_where(object, clone_node, |id| {
+            let real_top_phi = phi_map[&top_phi];
-                    id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()])
+            let clone_node = edit.add_node(Node::Write {
-                })?;
+                collect: real_top_phi,
+                data: object,
+                indices: vec![].into_boxed_slice(),
+            });
-                // Get rid of the dummy phis.
+            // Make users use the cloned object.
-                for (dummy, real) in phi_map {
+            edit = edit.replace_all_uses_where(object, clone_node, |id| {
-                    edit = edit.replace_all_uses(dummy, real)?;
+                id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()])
-                    edit = edit.delete_node(dummy)?;
+            })?;
-                }
-                // Make phis use the clone instead of the top phi.
+            // Get rid of the dummy phis.
-                edit =
+            for (dummy, real) in phi_map {
-                    edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)?;
+                edit = edit.replace_all_uses(dummy, real)?;
+                edit = edit.delete_node(dummy)?;
+            }
-                Ok(edit)
+            // Make phis use the clone instead of the top phi.
-            });
+            edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)
-            assert!(success);
+        });
+        assert!(success);
-            // De-duplicate phis.
+        // De-duplicate phis.
-            gvn(editor, false);
+        gvn(editor, false);
-            // Get rid of unused phis.
+        // Get rid of unused phis.
-            dce(editor);
+        dce(editor);
-            // Simplify phis.
+        // Simplify phis.
-            phi_elim(editor);
+        phi_elim(editor);
-        }
+    }
-    };
+}
-    // Now that we've computed the used collections dataflow analysis, use the
+/*
-    // results to materialize a clone whenever a node attempts to use an already
+ * Helper function to analyze lattice values at each program point and find
-    // used node. As soon as any clone is found, return since that clone needs
+ * multiple dynamic users of a single write. Return as soon as any clone is
-    // to get placed before other clones can be discovered. Traverse blocks in
+ * found.
-    // postorder so that clones inside loops are discovered before loop-induced
+ */
-    // clones.
+fn find_clones(
+    editor: &mut FunctionEditor,
+    super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
+    lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
+    rev_po: &Vec<NodeID>,
+    typing: &Vec<TypeID>,
+    control_subgraph: &Subgraph,
+    dom: &DomTree,
+    loops: &LoopTree,
+    objects: &CollectionObjects,
+    bb_to_prefix_sum: &Vec<usize>,
+    bbs: &BasicBlocks,
+) -> bool {
+    let nodes = &editor.func().nodes;
+    let func_id = editor.func_id();
    for bb in rev_po.iter().rev() {
        let insts = &bbs.1[bb.idx()];
        // Accumulate predecessor bottom used sets for phis. Phis are special in
 @@ -956,7 +1055,21 @@ fn materialize_clones(
 @@ -956,7 +1055,21 @@ fn materialize_clones(
                            bottom.clone()
                        });
                        if bottom.contains_key(&arg) {
-                            induce_clone(*arg, *inst, bottom);
+                            induce_clone(
+                                editor,
+                                *arg,
+                                *inst,
+                                bottom,
+                                super_value,
+                                lattice,
+                                rev_po,
+                                typing,
+                                control_subgraph,
+                                dom,
+                                loops,
+                                bb_to_prefix_sum,
+                                bbs,
+                            );
                            return true;
                        } else {
                            // Subsequent phis using `arg` along the same
 @@ -972,11 +1085,39 @@ fn materialize_clones(
 @@ -972,11 +1085,39 @@ fn materialize_clones(
                    third,
                } => {
                    if value.contains_key(&second) {
-                        induce_clone(second, *inst, &value);
+                        induce_clone(
+                            editor,
+                            second,
+                            *inst,
+                            &value,
+                            super_value,
+                            lattice,
+                            rev_po,
+                            typing,
+                            control_subgraph,
+                            dom,
+                            loops,
+                            bb_to_prefix_sum,
+                            bbs,
+                        );
                        return true;
                    }
                    if value.contains_key(&third) {
-                        induce_clone(third, *inst, &value);
+                        induce_clone(
+                            editor,
+                            third,
+                            *inst,
+                            &value,
+                            super_value,
+                            lattice,
+                            rev_po,
+                            typing,
+                            control_subgraph,
+                            dom,
+                            loops,
+                            bb_to_prefix_sum,
+                            bbs,
+                        );
                        return true;
                    }
                }
 @@ -986,7 +1127,21 @@ fn materialize_clones(
 @@ -986,7 +1127,21 @@ fn materialize_clones(
                    reduct: _,
                } => {
                    if value.contains_key(&init) {
-                        induce_clone(init, *inst, &value);
+                        induce_clone(
+                            editor,
+                            init,
+                            *inst,
+                            &value,
+                            super_value,
+                            lattice,
+                            rev_po,
+                            typing,
+                            control_subgraph,
+                            dom,
+                            loops,
+                            bb_to_prefix_sum,
+                            bbs,
+                        );
                        return true;
                    }
                }
 @@ -995,7 +1150,21 @@ fn materialize_clones(
 @@ -995,7 +1150,21 @@ fn materialize_clones(
                    indices: _,
                } if !objects[&func_id].objects(*inst).is_empty() => {
                    if value.contains_key(&collect) {
-                        induce_clone(collect, *inst, &value);
+                        induce_clone(
+                            editor,
+                            collect,
+                            *inst,
+                            &value,
+                            super_value,
+                            lattice,
+                            rev_po,
+                            typing,
+                            control_subgraph,
+                            dom,
+                            loops,
+                            bb_to_prefix_sum,
+                            bbs,
+                        );
                        return true;
                    }
                }
 @@ -1005,7 +1174,21 @@ fn materialize_clones(
 @@ -1005,7 +1174,21 @@ fn materialize_clones(
                    indices: _,
                } => {
                    if value.contains_key(&collect) {
-                        induce_clone(collect, *inst, &value);
+                        induce_clone(
+                            editor,
+                            collect,
+                            *inst,
+                            &value,
+                            super_value,
+                            lattice,
+                            rev_po,
+                            typing,
+                            control_subgraph,
+                            dom,
+                            loops,
+                            bb_to_prefix_sum,
+                            bbs,
+                        );
                        return true;
                    }
                }
 @@ -1026,7 +1209,21 @@ fn materialize_clones(
 @@ -1026,7 +1209,21 @@ fn materialize_clones(
                            .unwrap_or(false)
                            && value.contains_key(arg)
                        {
-                            induce_clone(*arg, *inst, value);
+                            induce_clone(
+                                editor,
+                                *arg,
+                                *inst,
+                                value,
+                                super_value,
+                                lattice,
+                                rev_po,
+                                typing,
+                                control_subgraph,
+                                dom,
+                                loops,
+                                bb_to_prefix_sum,
+                                bbs,
+                            );
                            return true;
                        }
                    }