diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs
index 7c67af538b81946f67786a8133b9f670b7138f9e..415244043c304fd94e7b95cd4c3ace990bfdcbcb 100644
--- a/hercules_cg/src/cpu.rs
+++ b/hercules_cg/src/cpu.rs
@@ -272,7 +272,7 @@ impl<'a> CPUContext<'a> {
                             write!(body, "double {} to double", val)?
                         }
                     }
-                    _ => panic!("PANIC: Can't dynamically allocate memory for an aggregate type within a CPU function."),
+                    _ => panic!("PANIC: Can't dynamically allocate memory for an aggregate type within a CPU function ({:?} in {}).", id, self.function.name),
                 }
             }
             Node::DynamicConstant { id: dc_id } => {
diff --git a/hercules_opt/src/clone_elim.rs b/hercules_opt/src/clone_elim.rs
deleted file mode 100644
index 59507baca5731c94e8b63963e644792a50bf2ea7..0000000000000000000000000000000000000000
--- a/hercules_opt/src/clone_elim.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-extern crate hercules_ir;
-
-use std::collections::BTreeSet;
-
-use self::hercules_ir::ir::*;
-
-use crate::*;
-
-/*
- * Top level function to run clone elimination.
- */
-pub fn clone_elim(editor: &mut FunctionEditor) {
-    // Create workset (starts as all nodes).
-    let mut workset: BTreeSet<NodeID> = (0..editor.func().nodes.len()).map(NodeID::new).collect();
-
-    while let Some(work) = workset.pop_first() {
-        // Look for Write nodes with identical `collect` and `data` inputs.
-        let nodes = &editor.func().nodes;
-        if let Node::Write {
-            collect,
-            data,
-            ref indices,
-        } = nodes[work.idx()]
-            && nodes[collect.idx()] == nodes[data.idx()]
-        {
-            assert!(indices.is_empty());
-            editor.edit(|edit| edit.replace_all_uses(work, collect)?.delete_node(work));
-
-            // Removing this write may affect downstream writes.
-            workset.extend(editor.get_users(work));
-        }
-    }
-}
diff --git a/hercules_opt/src/editor.rs b/hercules_opt/src/editor.rs
index 4ff08e6927103ed39d8025ab1fac7bc52d52984a..313596514216257797ea3aa0cd17af332f37cbf2 100644
--- a/hercules_opt/src/editor.rs
+++ b/hercules_opt/src/editor.rs
@@ -62,6 +62,7 @@ pub struct FunctionEdit<'a: 'b, 'b> {
     added_types: Vec<Type>,
     // Compute a def-use map entries iteratively.
     updated_def_use: BTreeMap<NodeID, HashSet<NodeID>>,
+    updated_param_types: Option<Vec<TypeID>>,
     updated_return_type: Option<TypeID>,
     // Keep track of which deleted and added node IDs directly correspond.
     sub_edits: Vec<(NodeID, NodeID)>,
@@ -113,6 +114,7 @@ impl<'a: 'b, 'b> FunctionEditor<'a> {
             added_dynamic_constants: Vec::new().into(),
             added_types: Vec::new().into(),
             updated_def_use: BTreeMap::new(),
+            updated_param_types: None,
             updated_return_type: None,
             sub_edits: vec![],
         };
@@ -130,6 +132,7 @@ impl<'a: 'b, 'b> FunctionEditor<'a> {
                 added_dynamic_constants,
                 added_types,
                 updated_def_use,
+                updated_param_types,
                 updated_return_type,
                 sub_edits,
             } = populated_edit;
@@ -206,7 +209,12 @@ impl<'a: 'b, 'b> FunctionEditor<'a> {
             editor_dynamic_constants.extend(added_dynamic_constants);
             editor_types.extend(added_types);
 
-            // Step 8: update return type if necessary.
+            // Step 8: update parameter types if necessary.
+            if let Some(param_types) = updated_param_types {
+                editor.function.param_types = param_types;
+            }
+
+            // Step 9: update return type if necessary.
             if let Some(return_type) = updated_return_type {
                 editor.function.return_type = return_type;
             }
@@ -580,6 +588,10 @@ impl<'a, 'b> FunctionEdit<'a, 'b> {
         }
     }
 
+    pub fn set_param_types(&mut self, tys: Vec<TypeID>) {
+        self.updated_param_types = Some(tys);
+    }
+
     pub fn set_return_type(&mut self, ty: TypeID) {
         self.updated_return_type = Some(ty);
     }
diff --git a/hercules_opt/src/float_collections.rs b/hercules_opt/src/float_collections.rs
new file mode 100644
index 0000000000000000000000000000000000000000..30df387598395f9182fd123024d14240510ce73a
--- /dev/null
+++ b/hercules_opt/src/float_collections.rs
@@ -0,0 +1,107 @@
+extern crate hercules_ir;
+
+use self::hercules_ir::*;
+
+use crate::*;
+
+/*
+ * Float collections constants out of device functions, where allocation isn't
+ * allowed.
+ */
+pub fn float_collections(
+    editors: &mut [FunctionEditor],
+    typing: &ModuleTyping,
+    callgraph: &CallGraph,
+    devices: &Vec<Device>,
+) {
+    let topo = callgraph.topo();
+    for to_float_id in topo {
+        // Collection constants float until reaching an AsyncRust function.
+        if devices[to_float_id.idx()] == Device::AsyncRust {
+            continue;
+        }
+
+        // Find the target constant nodes in the function.
+        let cons: Vec<(NodeID, Node)> = editors[to_float_id.idx()]
+            .func()
+            .nodes
+            .iter()
+            .enumerate()
+            .filter(|(_, node)| {
+                node.try_constant()
+                    .map(|cons_id| !editors[to_float_id.idx()].get_constant(cons_id).is_scalar())
+                    .unwrap_or(false)
+            })
+            .map(|(idx, node)| (NodeID::new(idx), node.clone()))
+            .collect();
+        if cons.is_empty() {
+            continue;
+        }
+
+        // Each constant node becomes a new parameter.
+        let mut new_param_types = editors[to_float_id.idx()].func().param_types.clone();
+        let old_num_params = new_param_types.len();
+        for (id, _) in cons.iter() {
+            new_param_types.push(typing[to_float_id.idx()][id.idx()]);
+        }
+        let success = editors[to_float_id.idx()].edit(|mut edit| {
+            for (idx, (id, _)) in cons.iter().enumerate() {
+                let param = edit.add_node(Node::Parameter {
+                    index: idx + old_num_params,
+                });
+                edit = edit.replace_all_uses(*id, param)?;
+                edit = edit.delete_node(*id)?;
+            }
+            edit.set_param_types(new_param_types);
+            Ok(edit)
+        });
+        if !success {
+            continue;
+        }
+
+        // Add constants in callers and pass them into calls.
+        for caller in callgraph.get_callers(to_float_id) {
+            let calls: Vec<(NodeID, Node)> = editors[caller.idx()]
+                .func()
+                .nodes
+                .iter()
+                .enumerate()
+                .filter(|(_, node)| {
+                    node.try_call()
+                        .map(|(_, callee, _, _)| callee == to_float_id)
+                        .unwrap_or(false)
+                })
+                .map(|(idx, node)| (NodeID::new(idx), node.clone()))
+                .collect();
+            let success = editors[caller.idx()].edit(|mut edit| {
+                let cons_ids: Vec<_> = cons
+                    .iter()
+                    .map(|(_, node)| edit.add_node(node.clone()))
+                    .collect();
+                for (id, node) in calls {
+                    let Node::Call {
+                        control,
+                        function,
+                        dynamic_constants,
+                        args,
+                    } = node
+                    else {
+                        panic!()
+                    };
+                    let mut args = Vec::from(args);
+                    args.extend(cons_ids.iter());
+                    let new_call = edit.add_node(Node::Call {
+                        control,
+                        function,
+                        dynamic_constants,
+                        args: args.into_boxed_slice(),
+                    });
+                    edit = edit.replace_all_uses(id, new_call)?;
+                    edit = edit.delete_node(id)?;
+                }
+                Ok(edit)
+            });
+            assert!(success);
+        }
+    }
+}
diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs
new file mode 100644
index 0000000000000000000000000000000000000000..76ce3fdfeced77b5663dc99cd76049ba13e1172c
--- /dev/null
+++ b/hercules_opt/src/gcm.rs
@@ -0,0 +1,888 @@
+extern crate bitvec;
+extern crate either;
+extern crate hercules_cg;
+extern crate hercules_ir;
+
+use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
+use std::iter::{empty, once, zip, FromIterator};
+
+use self::bitvec::prelude::*;
+use self::either::Either;
+
+use self::hercules_cg::*;
+use self::hercules_ir::*;
+
+use crate::*;
+
+/*
+ * Top level function to legalize the reference semantics of a Hercules IR
+ * function. Hercules IR is a value semantics representation, meaning that all
+ * program state is in the form of copyable values, and mutation takes place by
+ * making a new value that is a copy of the old value with some modification.
+ * This representation is extremely convenient for optimization, but is not good
+ * for code generation, where we need to generate code with references to get
+ * good performance. Hercules IR can alternatively be interpreted using
+ * reference semantics, where pointers to collection objects are passed around,
+ * read from, and written to. However, the value semantics and reference
+ * semantics interpretation of a Hercules IR function may not be equal - this
+ * pass transforms a Hercules IR function such that its new value semantics is
+ * the same as its old value semantics and that its new reference semantics is
+ * the same as its new value semantics. This pass returns a placement of nodes
+ * into ordered basic blocks, since the reference semantics of a function
+ * depends on the order of execution with respect to anti-dependencies. This
+ * is analogous to global code motion from the original sea of nodes paper.
+ *
+ * Our strategy for handling multiple mutating users of a collection is to treat
+ * the problem similar to register allocation; we perform a liveness analysis,
+ * spill constants into newly allocated constants, and read back the spilled
+ * contents when they are used after the first mutation. It's not obvious how
+ * many spills are needed upfront, and newly spilled constants may affect the
+ * liveness analysis result, so every spill restarts the process of checking for
+ * spills. Once no more spills are found, the process terminates. When a spill
+ * is found, the basic block assignments, and all the other analyses, are not
+ * necessarily valid anymore, so this function is called in a loop in pass.rs
+ * until no more spills are found.
+ */
+pub fn gcm(
+    editor: &mut FunctionEditor,
+    def_use: &ImmutableDefUseMap,
+    reverse_postorder: &Vec<NodeID>,
+    typing: &Vec<TypeID>,
+    control_subgraph: &Subgraph,
+    dom: &DomTree,
+    fork_join_map: &HashMap<NodeID, NodeID>,
+    loops: &LoopTree,
+    objects: &CollectionObjects,
+) -> Option<BasicBlocks> {
+    let bbs = basic_blocks(
+        editor.func(),
+        editor.func_id(),
+        def_use,
+        reverse_postorder,
+        control_subgraph,
+        dom,
+        loops,
+        fork_join_map,
+        objects,
+    );
+    if spill_clones(editor, typing, control_subgraph, objects, &bbs) {
+        None
+    } else {
+        Some(bbs)
+    }
+}
+
+/*
+ * Top level global code motion function. Assigns each data node to one of its
+ * immediate control use / user nodes, forming (unordered) basic blocks. Returns
+ * the control node / basic block each node is in. Takes in a partial
+ * partitioning that must be respected. Based on the schedule-early-schedule-
+ * late method from Cliff Click's PhD thesis. May fail if an anti-dependency
+ * edge can't be satisfied - in this case, a clone that has to be induced is
+ * returned instead.
+ */
+fn basic_blocks(
+    function: &Function,
+    func_id: FunctionID,
+    def_use: &ImmutableDefUseMap,
+    reverse_postorder: &Vec<NodeID>,
+    control_subgraph: &Subgraph,
+    dom: &DomTree,
+    loops: &LoopTree,
+    fork_join_map: &HashMap<NodeID, NodeID>,
+    objects: &CollectionObjects,
+) -> BasicBlocks {
+    let mut bbs: Vec<Option<NodeID>> = vec![None; function.nodes.len()];
+
+    // Step 1: assign the basic block locations of all nodes that must be in a
+    // specific block. This includes control nodes as well as some special data
+    // nodes, such as phis.
+    for idx in 0..function.nodes.len() {
+        match function.nodes[idx] {
+            Node::Phi { control, data: _ } => bbs[idx] = Some(control),
+            Node::ThreadID {
+                control,
+                dimension: _,
+            } => bbs[idx] = Some(control),
+            Node::Reduce {
+                control,
+                init: _,
+                reduct: _,
+            } => bbs[idx] = Some(control),
+            Node::Call {
+                control,
+                function: _,
+                dynamic_constants: _,
+                args: _,
+            } => bbs[idx] = Some(control),
+            Node::Parameter { index: _ } => bbs[idx] = Some(NodeID::new(0)),
+            Node::Constant { id: _ } => bbs[idx] = Some(NodeID::new(0)),
+            Node::DynamicConstant { id: _ } => bbs[idx] = Some(NodeID::new(0)),
+            _ if function.nodes[idx].is_control() => bbs[idx] = Some(NodeID::new(idx)),
+            _ => {}
+        }
+    }
+
+    // Step 2: schedule early. Place nodes in the earliest position they could
+    // go - use worklist to iterate nodes.
+    let mut schedule_early = bbs.clone();
+    let mut worklist = VecDeque::from(reverse_postorder.clone());
+    while let Some(id) = worklist.pop_front() {
+        if schedule_early[id.idx()].is_some() {
+            continue;
+        }
+
+        // For every use, check what block is its "schedule early" block. This
+        // node goes in the lowest block amongst those blocks.
+        let use_places: Option<Vec<NodeID>> = get_uses(&function.nodes[id.idx()])
+            .as_ref()
+            .into_iter()
+            .map(|id| *id)
+            .map(|id| schedule_early[id.idx()])
+            .collect();
+        if let Some(use_places) = use_places {
+            // If every use has been placed, we can place this node as the
+            // lowest place in the domtree that dominates all of the use places.
+            let lowest = dom.lowest_amongst(use_places.into_iter());
+            schedule_early[id.idx()] = Some(lowest);
+        } else {
+            // If not, then just push this node back on the worklist.
+            worklist.push_back(id);
+        }
+    }
+
+    // Step 3: find anti-dependence edges. An anti-dependence edge needs to be
+    // drawn between a collection reading node and a collection mutating node
+    // when the following conditions are true:
+    //
+    // 1: The reading and mutating nodes may involve the same collection.
+    // 2: The node producing the collection used by the reading node is in a
+    //    schedule early block that dominates the schedule early block of the
+    //    mutating node. The node producing the collection used by the reading
+    //    node may be an originator of a collection, phi or reduce, or mutator,
+    //    but not forwarding read - forwarding reads are collapsed, and the
+    //    bottom read is treated as reading from the transitive parent of the
+    //    forwarding read(s).
+    let mut antideps = BTreeSet::new();
+    for id in reverse_postorder.iter() {
+        // Find a terminating read node and the collections it reads.
+        let terminating_reads: BTreeSet<_> =
+            terminating_reads(function, func_id, *id, objects).collect();
+        if !terminating_reads.is_empty() {
+            // Walk forwarding reads to find anti-dependency roots.
+            let mut workset = terminating_reads.clone();
+            let mut roots = BTreeSet::new();
+            while let Some(pop) = workset.pop_first() {
+                let forwarded: BTreeSet<_> =
+                    forwarding_reads(function, func_id, pop, objects).collect();
+                if forwarded.is_empty() {
+                    roots.insert(pop);
+                } else {
+                    workset.extend(forwarded);
+                }
+            }
+
+            // For each root, find mutating nodes dominated by the root that
+            // modify an object read on any input of the current node (the
+            // terminating read).
+            // TODO: make this less outrageously inefficient.
+            let func_objects = &objects[&func_id];
+            for root in roots.iter() {
+                let root_early = schedule_early[root.idx()].unwrap();
+                let mut root_block_iterated_users: BTreeSet<NodeID> = BTreeSet::new();
+                let mut workset = BTreeSet::new();
+                workset.insert(*root);
+                while let Some(pop) = workset.pop_first() {
+                    let users = def_use.get_users(pop).into_iter().filter(|user| {
+                        !function.nodes[user.idx()].is_phi()
+                            && !function.nodes[user.idx()].is_reduce()
+                            && schedule_early[user.idx()].unwrap() == root_early
+                    });
+                    workset.extend(users.clone());
+                    root_block_iterated_users.extend(users);
+                }
+                let read_objs: BTreeSet<_> = terminating_reads
+                    .iter()
+                    .map(|read_use| func_objects.objects(*read_use).into_iter())
+                    .flatten()
+                    .map(|id| *id)
+                    .collect();
+                for mutator in reverse_postorder.iter() {
+                    let mutator_early = schedule_early[mutator.idx()].unwrap();
+                    if dom.does_dom(root_early, mutator_early)
+                        && (root_early != mutator_early
+                            || root_block_iterated_users.contains(&mutator))
+                        && mutating_objects(function, func_id, *mutator, objects)
+                            .any(|mutated| read_objs.contains(&mutated))
+                        && id != mutator
+                    {
+                        antideps.insert((*id, *mutator));
+                    }
+                }
+            }
+        }
+    }
+    let mut antideps_uses = vec![vec![]; function.nodes.len()];
+    let mut antideps_users = vec![vec![]; function.nodes.len()];
+    for (reader, mutator) in antideps.iter() {
+        antideps_uses[mutator.idx()].push(*reader);
+        antideps_users[reader.idx()].push(*mutator);
+    }
+
+    // Step 4: schedule late and pick each nodes final position. Since the late
+    // schedule of each node depends on the final positions of its users, these
+    // two steps must be fused. Compute their latest position, then use the
+    // control dependent + shallow loop heuristic to actually place them. A
+    // placement might not necessarily be found due to anti-dependency edges.
+    // These are optional and not necessary to consider, but we do since obeying
+    // them can reduce the number of clones. If the worklist stops making
+    // progress, stop considering the anti-dependency edges.
+    let join_fork_map: HashMap<NodeID, NodeID> = fork_join_map
+        .into_iter()
+        .map(|(fork, join)| (*join, *fork))
+        .collect();
+    let mut worklist = VecDeque::from_iter(reverse_postorder.into_iter().map(|id| *id).rev());
+    let mut num_skip_iters = 0;
+    let mut consider_antidependencies = true;
+    while let Some(id) = worklist.pop_front() {
+        if num_skip_iters >= worklist.len() {
+            consider_antidependencies = false;
+        }
+
+        if bbs[id.idx()].is_some() {
+            num_skip_iters = 0;
+            continue;
+        }
+
+        // Calculate the least common ancestor of user blocks, a.k.a. the "late"
+        // schedule.
+        let calculate_lca = || -> Option<_> {
+            let mut lca = None;
+            // Helper to incrementally update the LCA.
+            let mut update_lca = |a| {
+                if let Some(acc) = lca {
+                    lca = Some(dom.least_common_ancestor(acc, a));
+                } else {
+                    lca = Some(a);
+                }
+            };
+
+            // For every user, consider where we need to be to directly dominate the
+            // user.
+            for user in def_use
+                .get_users(id)
+                .as_ref()
+                .into_iter()
+                .chain(if consider_antidependencies {
+                    Either::Left(antideps_users[id.idx()].iter())
+                } else {
+                    Either::Right(empty())
+                })
+                .map(|id| *id)
+            {
+                if let Node::Phi { control, data } = &function.nodes[user.idx()] {
+                    // For phis, we need to dominate the block jumping to the phi in
+                    // the slot that corresponds to our use.
+                    for (control, data) in
+                        zip(get_uses(&function.nodes[control.idx()]).as_ref(), data)
+                    {
+                        if id == *data {
+                            update_lca(*control);
+                        }
+                    }
+                } else if let Node::Reduce {
+                    control,
+                    init,
+                    reduct,
+                } = &function.nodes[user.idx()]
+                {
+                    // For reduces, we need to either dominate the block right
+                    // before the fork if we're the init input, or we need to
+                    // dominate the join if we're the reduct input.
+                    if id == *init {
+                        let before_fork = function.nodes[join_fork_map[control].idx()]
+                            .try_fork()
+                            .unwrap()
+                            .0;
+                        update_lca(before_fork);
+                    } else {
+                        assert_eq!(id, *reduct);
+                        update_lca(*control);
+                    }
+                } else {
+                    // For everything else, we just need to dominate the user.
+                    update_lca(bbs[user.idx()]?);
+                }
+            }
+
+            Some(lca)
+        };
+
+        // Check if all users have been placed. If one of them hasn't, then add
+        // this node back on to the worklist.
+        let Some(lca) = calculate_lca() else {
+            worklist.push_back(id);
+            num_skip_iters += 1;
+            continue;
+        };
+
+        // Look between the LCA and the schedule early location to place the
+        // node.
+        let schedule_early = schedule_early[id.idx()].unwrap();
+        let schedule_late = lca.unwrap_or(schedule_early);
+        let mut chain = dom
+            // If the node has no users, then it doesn't really matter where we
+            // place it - just place it at the early placement.
+            .chain(schedule_late, schedule_early);
+
+        if let Some(mut location) = chain.next() {
+            while let Some(control_node) = chain.next() {
+                // If the next node further up the dominator tree is in a shallower
+                // loop nest or if we can get out of a reduce loop when we don't
+                // need to be in one, place this data node in a higher-up location.
+                let old_nest = loops
+                    .header_of(location)
+                    .map(|header| loops.nesting(header).unwrap());
+                let new_nest = loops
+                    .header_of(control_node)
+                    .map(|header| loops.nesting(header).unwrap());
+                let shallower_nest = if let (Some(old_nest), Some(new_nest)) = (old_nest, new_nest)
+                {
+                    old_nest > new_nest
+                } else {
+                    // If the new location isn't a loop, it's nesting level should
+                    // be considered "shallower" if the current location is in a
+                    // loop.
+                    old_nest.is_some()
+                };
+                // This will move all nodes that don't need to be in reduce loops
+                // outside of reduce loops. Nodes that do need to be in a reduce
+                // loop use the reduce node forming the loop, so the dominator chain
+                // will consist of one block, and this loop won't ever iterate.
+                let currently_at_join = function.nodes[location.idx()].is_join();
+                if shallower_nest || currently_at_join {
+                    location = control_node;
+                }
+            }
+
+            bbs[id.idx()] = Some(location);
+            num_skip_iters = 0;
+        } else {
+            // If there is no valid location for this node, then it's a reading
+            // node of a collection that can't be placed above a mutation that
+            // anti-depend uses it. Push the node back on the list, and we'll
+            // stop considering anti-dependencies soon. Don't immediately stop
+            // considering anti-dependencies, as we may be able to eak out some
+            // more use of them.
+            worklist.push_back(id);
+            num_skip_iters += 1;
+            continue;
+        }
+    }
+    let bbs: Vec<_> = bbs.into_iter().map(Option::unwrap).collect();
+    // Calculate the number of phis and reduces per basic block. We use this to
+    // emit phis and reduces at the top of basic blocks. We want to emit phis
+    // and reduces first into ordered basic blocks for two reasons:
+    // 1. This is useful for liveness analysis.
+    // 2. This is needed for some backends - LLVM expects phis to be at the top
+    //    of basic blocks.
+    let mut num_phis_reduces = vec![0; function.nodes.len()];
+    for (node_idx, bb) in bbs.iter().enumerate() {
+        let node = &function.nodes[node_idx];
+        if node.is_phi() || node.is_reduce() {
+            num_phis_reduces[bb.idx()] += 1;
+        }
+    }
+
+    // Step 5: determine the order of nodes inside each block. Use worklist to
+    // add nodes to blocks in order that obeys dependencies.
+    let mut order: Vec<Vec<NodeID>> = vec![vec![]; function.nodes.len()];
+    let mut worklist = VecDeque::from_iter(
+        reverse_postorder
+            .into_iter()
+            .filter(|id| !function.nodes[id.idx()].is_control()),
+    );
+    let mut visited = bitvec![u8, Lsb0; 0; function.nodes.len()];
+    let mut num_skip_iters = 0;
+    let mut consider_antidependencies = true;
+    while let Some(id) = worklist.pop_front() {
+        // If the worklist isn't making progress, then there's at least one
+        // reading node of a collection that is in a anti-depend + normal depend
+        // use cycle with a mutating node. See above comment about anti-
+        // dependencies being optional; we just stop considering them here.
+        if num_skip_iters >= worklist.len() {
+            consider_antidependencies = false;
+        }
+
+        // Phis and reduces always get emitted. Other nodes need to obey
+        // dependency relationships and need to come after phis and reduces.
+        let node = &function.nodes[id.idx()];
+        let bb = bbs[id.idx()];
+        if node.is_phi()
+            || node.is_reduce()
+            || (num_phis_reduces[bb.idx()] == 0
+                && get_uses(node)
+                    .as_ref()
+                    .into_iter()
+                    .chain(if consider_antidependencies {
+                        Either::Left(antideps_uses[id.idx()].iter())
+                    } else {
+                        Either::Right(empty())
+                    })
+                    .all(|u| {
+                        function.nodes[u.idx()].is_control()
+                            || bbs[u.idx()] != bbs[id.idx()]
+                            || visited[u.idx()]
+                    }))
+        {
+            order[bb.idx()].push(*id);
+            visited.set(id.idx(), true);
+            num_skip_iters = 0;
+            if node.is_phi() || node.is_reduce() {
+                num_phis_reduces[bb.idx()] -= 1;
+            }
+        } else {
+            worklist.push_back(id);
+            num_skip_iters += 1;
+        }
+    }
+
+    (bbs, order)
+}
+
+fn terminating_reads<'a>(
+    function: &'a Function,
+    func_id: FunctionID,
+    reader: NodeID,
+    objects: &'a CollectionObjects,
+) -> Box<dyn Iterator<Item = NodeID> + 'a> {
+    match function.nodes[reader.idx()] {
+        Node::Read {
+            collect,
+            indices: _,
+        } if objects[&func_id].objects(reader).is_empty() => Box::new(once(collect)),
+        Node::Write {
+            collect: _,
+            data,
+            indices: _,
+        } if !objects[&func_id].objects(data).is_empty() => Box::new(once(data)),
+        Node::Call {
+            control: _,
+            function: callee,
+            dynamic_constants: _,
+            ref args,
+        } => Box::new(args.into_iter().enumerate().filter_map(move |(idx, arg)| {
+            let objects = &objects[&callee];
+            let returns = objects.returned_objects();
+            let param_obj = objects.param_to_object(idx)?;
+            if !objects.is_mutated(param_obj) && !returns.contains(&param_obj) {
+                Some(*arg)
+            } else {
+                None
+            }
+        })),
+        _ => Box::new(empty()),
+    }
+}
+
+fn forwarding_reads<'a>(
+    function: &'a Function,
+    func_id: FunctionID,
+    reader: NodeID,
+    objects: &'a CollectionObjects,
+) -> Box<dyn Iterator<Item = NodeID> + 'a> {
+    match function.nodes[reader.idx()] {
+        Node::Read {
+            collect,
+            indices: _,
+        } if !objects[&func_id].objects(reader).is_empty() => Box::new(once(collect)),
+        Node::Ternary {
+            op: TernaryOperator::Select,
+            first: _,
+            second,
+            third,
+        } if !objects[&func_id].objects(reader).is_empty() => {
+            Box::new(once(second).chain(once(third)))
+        }
+        Node::Call {
+            control: _,
+            function: callee,
+            dynamic_constants: _,
+            ref args,
+        } => Box::new(args.into_iter().enumerate().filter_map(move |(idx, arg)| {
+            let objects = &objects[&callee];
+            let returns = objects.returned_objects();
+            let param_obj = objects.param_to_object(idx)?;
+            if !objects.is_mutated(param_obj) && returns.contains(&param_obj) {
+                Some(*arg)
+            } else {
+                None
+            }
+        })),
+        _ => Box::new(empty()),
+    }
+}
+
+fn mutating_objects<'a>(
+    function: &'a Function,
+    func_id: FunctionID,
+    mutator: NodeID,
+    objects: &'a CollectionObjects,
+) -> Box<dyn Iterator<Item = CollectionObjectID> + 'a> {
+    match function.nodes[mutator.idx()] {
+        Node::Write {
+            collect,
+            data: _,
+            indices: _,
+        } => Box::new(objects[&func_id].objects(collect).into_iter().map(|id| *id)),
+        Node::Call {
+            control: _,
+            function: callee,
+            dynamic_constants: _,
+            ref args,
+        } => Box::new(
+            args.into_iter()
+                .enumerate()
+                .filter_map(move |(idx, arg)| {
+                    let callee_objects = &objects[&callee];
+                    let param_obj = callee_objects.param_to_object(idx)?;
+                    if callee_objects.is_mutated(param_obj) {
+                        Some(objects[&func_id].objects(*arg).into_iter().map(|id| *id))
+                    } else {
+                        None
+                    }
+                })
+                .flatten(),
+        ),
+        _ => Box::new(empty()),
+    }
+}
+
+type Liveness = BTreeMap<NodeID, Vec<BTreeSet<NodeID>>>;
+
+/*
+ * Top level function to find implicit clones that need to be spilled. Returns
+ * whether a clone was spilled, in which case the whole scheduling process must
+ * be restarted.
+ */
+fn spill_clones(
+    editor: &mut FunctionEditor,
+    typing: &Vec<TypeID>,
+    control_subgraph: &Subgraph,
+    objects: &CollectionObjects,
+    bbs: &BasicBlocks,
+) -> bool {
+    // Step 1: compute a liveness analysis of collection values in the IR. This
+    // requires a dataflow analysis over the scheduled IR, which is not a common
+    // need in Hercules, so just hardcode the analysis.
+    let liveness = liveness_dataflow(
+        editor.func(),
+        editor.func_id(),
+        control_subgraph,
+        objects,
+        bbs,
+    );
+
+    // Step 2: compute an interference graph from the liveness result. This
+    // graph contains a vertex per node ID producing a collection value and an
+    // edge per pair of node IDs that interfere. Nodes A and B interfere if node
+    // A is defined right above a point where node B is live.
+    let mut edges = vec![];
+    for (bb, liveness) in liveness {
+        let insts = &bbs.1[bb.idx()];
+        for (node, live) in zip(insts, liveness.into_iter().skip(1)) {
+            for live_node in live {
+                if *node != live_node {
+                    edges.push((*node, live_node));
+                }
+            }
+        }
+    }
+
+    // Step 3: filter edges (A, B) to just see edges where A uses B and A isn't
+    // a terminating read. These are the edges that may require a spill.
+    let mut spill_edges = edges.into_iter().filter(|(a, b)| {
+        get_uses(&editor.func().nodes[a.idx()])
+            .as_ref()
+            .into_iter()
+            .any(|u| *u == *b)
+            && !terminating_reads(editor.func(), editor.func_id(), *a, objects).any(|id| id == *b)
+    });
+
+    // Step 4: if there is a spill edge, spill it and return true. Otherwise,
+    // return false.
+    if let Some((user, obj)) = spill_edges.next() {
+        // Figure out the most immediate dominating region for every basic
+        // block. These are the points where spill slot phis get placed.
+        let nodes = &editor.func().nodes;
+        let mut imm_dom_reg = vec![NodeID::new(0); editor.func().nodes.len()];
+        for (idx, node) in nodes.into_iter().enumerate() {
+            if node.is_region() {
+                imm_dom_reg[idx] = NodeID::new(idx);
+            }
+        }
+        let rev_po = control_subgraph.rev_po(NodeID::new(0));
+        for bb in rev_po.iter() {
+            if !nodes[bb.idx()].is_region() && !nodes[bb.idx()].is_start() {
+                imm_dom_reg[bb.idx()] =
+                    imm_dom_reg[control_subgraph.preds(*bb).next().unwrap().idx()];
+            }
+        }
+
+        let other_obj_users: Vec<_> = editor.get_users(obj).filter(|id| *id != user).collect();
+        let mut dummy_phis = vec![NodeID::new(0); imm_dom_reg.len()];
+        let mut success = editor.edit(|mut edit| {
+            // Construct the spill slot. This is just a constant that gets phi-
+            // ed throughout the entire function.
+            let cons_id = edit.add_zero_constant(typing[obj.idx()]);
+            let slot_id = edit.add_node(Node::Constant { id: cons_id });
+
+            // Allocate IDs for phis that move the spill slot throughout the
+            // function without implicit clones. These are dummy phis, since
+            // there are potentially cycles between them. We will replace them
+            // later.
+            for (idx, reg) in imm_dom_reg.iter().enumerate().skip(1) {
+                if idx == reg.idx() {
+                    dummy_phis[idx] = edit.add_node(Node::Phi {
+                        control: *reg,
+                        data: empty().collect(),
+                    });
+                }
+            }
+
+            // Spill `obj` before `user` potentially modifies it.
+            let spill_region = imm_dom_reg[bbs.0[obj.idx()].idx()];
+            let spill_id = edit.add_node(Node::Write {
+                collect: if spill_region == NodeID::new(0) {
+                    slot_id
+                } else {
+                    dummy_phis[spill_region.idx()]
+                },
+                data: obj,
+                indices: empty().collect(),
+            });
+
+            // Before each other user, unspill `obj`.
+            for other_user in other_obj_users {
+                let other_region = imm_dom_reg[bbs.0[other_user.idx()].idx()];
+                // If this assert fails, then `obj` is not in the first basic
+                // block, but it has a user that is in the first basic block,
+                // which violates SSA.
+                assert!(other_region == spill_region || other_region != NodeID::new(0));
+
+                // If an other user is a phi, we need to be a little careful
+                // about how we insert unspilling code for `obj`. Instead of
+                // inserting an unspill in the same block as the user, we need
+                // to insert one in each predecessor of the phi that corresponds
+                // to a use of `obj`. Since this requires modifying individual
+                // uses in a phi, just rebuild the node entirely.
+                if let Node::Phi { control, data } = edit.get_node(other_user).clone() {
+                    assert_eq!(control, other_region);
+                    let mut new_data = vec![];
+                    for (pred, data) in zip(control_subgraph.preds(control), data) {
+                        let pred = imm_dom_reg[pred.idx()];
+                        if data == obj {
+                            let unspill_id = edit.add_node(Node::Write {
+                                collect: obj,
+                                data: if pred == spill_region {
+                                    spill_id
+                                } else {
+                                    dummy_phis[pred.idx()]
+                                },
+                                indices: empty().collect(),
+                            });
+                            new_data.push(unspill_id);
+                        } else {
+                            new_data.push(data);
+                        }
+                    }
+                    let new_phi = edit.add_node(Node::Phi {
+                        control,
+                        data: new_data.into_boxed_slice(),
+                    });
+                    edit = edit.replace_all_uses(other_user, new_phi)?;
+                    edit = edit.delete_node(other_user)?;
+                } else {
+                    let unspill_id = edit.add_node(Node::Write {
+                        collect: obj,
+                        data: if other_region == spill_region {
+                            spill_id
+                        } else {
+                            dummy_phis[other_region.idx()]
+                        },
+                        indices: empty().collect(),
+                    });
+                    edit = edit.replace_all_uses_where(obj, unspill_id, |id| *id == other_user)?;
+                }
+            }
+
+            // Create and hook up all the real phis. Phi elimination will clean
+            // this up.
+            let mut real_phis = vec![NodeID::new(0); imm_dom_reg.len()];
+            for (idx, reg) in imm_dom_reg.iter().enumerate().skip(1) {
+                if idx == reg.idx() {
+                    real_phis[idx] = edit.add_node(Node::Phi {
+                        control: *reg,
+                        data: control_subgraph
+                            .preds(*reg)
+                            .map(|pred| {
+                                let pred = imm_dom_reg[pred.idx()];
+                                if pred == spill_region {
+                                    spill_id
+                                } else if pred == NodeID::new(0) {
+                                    slot_id
+                                } else {
+                                    dummy_phis[pred.idx()]
+                                }
+                            })
+                            .collect(),
+                    });
+                }
+            }
+            for (dummy, real) in zip(dummy_phis.iter(), real_phis) {
+                if *dummy != real {
+                    edit = edit.replace_all_uses(*dummy, real)?;
+                }
+            }
+
+            Ok(edit)
+        });
+        success = success
+            && editor.edit(|mut edit| {
+                for dummy in dummy_phis {
+                    if dummy != NodeID::new(0) {
+                        edit = edit.delete_node(dummy)?;
+                    }
+                }
+                Ok(edit)
+            });
+        assert!(success, "PANIC: GCM cannot fail to edit a function, as it needs to legalize the reference semantics of every function before code generation.");
+        true
+    } else {
+        false
+    }
+}
+
+/*
+ * Liveness dataflow analysis on scheduled Hercules IR. Just look at nodes that
+ * involve collections.
+ */
+fn liveness_dataflow(
+    function: &Function,
+    func_id: FunctionID,
+    control_subgraph: &Subgraph,
+    objects: &CollectionObjects,
+    bbs: &BasicBlocks,
+) -> Liveness {
+    let mut po = control_subgraph.rev_po(NodeID::new(0));
+    po.reverse();
+    let mut liveness = Liveness::default();
+    for (bb_idx, insts) in bbs.1.iter().enumerate() {
+        liveness.insert(NodeID::new(bb_idx), vec![BTreeSet::new(); insts.len() + 1]);
+    }
+    let mut num_phis_reduces = vec![0; function.nodes.len()];
+    let mut reducing = vec![false; function.nodes.len()];
+    for (node_idx, bb) in bbs.0.iter().enumerate() {
+        let node = &function.nodes[node_idx];
+        if node.is_phi() || node.is_reduce() {
+            num_phis_reduces[bb.idx()] += 1;
+            // Phis and reduces can't be in the same basic block.
+            if node.is_reduce() {
+                assert!(num_phis_reduces[bb.idx()] == 0 || reducing[bb.idx()]);
+                reducing[bb.idx()] = true;
+            } else {
+                assert!(!reducing[bb.idx()]);
+            }
+        }
+    }
+    let is_obj = |id: NodeID| !objects[&func_id].objects(id).is_empty();
+
+    loop {
+        let mut changed = false;
+
+        for bb in po.iter() {
+            // First, calculate the liveness set for the bottom of this block.
+            let last_pt = bbs.1[bb.idx()].len();
+            let old_value = &liveness[&bb][last_pt];
+            let mut new_value = BTreeSet::new();
+            for succ in control_subgraph.succs(*bb).chain(if reducing[bb.idx()] {
+                Either::Left(once(*bb))
+            } else {
+                Either::Right(empty())
+            }) {
+                // The liveness at the bottom of a basic block is the union of:
+                // 1. The liveness of each succecessor right after its phis and
+                //    reduces.
+                // 2. Every data use in a phi or reduce that corresponds to this
+                //    block as the predecessor.
+                let after_phis_reduces_pt = num_phis_reduces[succ.idx()];
+                new_value.extend(&liveness[&succ][after_phis_reduces_pt]);
+                for inst_idx in 0..after_phis_reduces_pt {
+                    let id = bbs.1[succ.idx()][inst_idx];
+                    new_value.remove(&id);
+                    match function.nodes[id.idx()] {
+                        Node::Phi { control, ref data } if is_obj(data[0]) => {
+                            assert_eq!(control, succ);
+                            new_value.extend(
+                                zip(control_subgraph.preds(succ), data)
+                                    .filter(|(pred, _)| *pred == *bb)
+                                    .map(|(_, data)| *data),
+                            );
+                        }
+                        Node::Reduce {
+                            control,
+                            init,
+                            reduct,
+                        } if is_obj(init) => {
+                            assert_eq!(control, succ);
+                            if succ == *bb {
+                                new_value.insert(reduct);
+                            } else {
+                                new_value.insert(init);
+                            }
+                        }
+                        _ => {}
+                    }
+                }
+            }
+            changed |= *old_value != new_value;
+            liveness.get_mut(&bb).unwrap()[last_pt] = new_value;
+
+            // Second, calculate the liveness set above each instruction in this block.
+            for pt in (0..last_pt).rev() {
+                let old_value = &liveness[&bb][pt];
+                let mut new_value = liveness[&bb][pt + 1].clone();
+                let id = bbs.1[bb.idx()][pt];
+                let uses = get_uses(&function.nodes[id.idx()]);
+                new_value.remove(&id);
+                new_value.extend(
+                    if let Node::Write {
+                        collect: _,
+                        data,
+                        ref indices,
+                    } = function.nodes[id.idx()]
+                        && indices.is_empty()
+                    {
+                        // If this write is a cloning write, the `collect` input
+                        // isn't actually live, because its value doesn't
+                        // matter.
+                        Either::Left(once(data).filter(|id| is_obj(*id)))
+                    } else {
+                        Either::Right(
+                            uses.as_ref()
+                                .into_iter()
+                                .map(|id| *id)
+                                .filter(|id| is_obj(*id)),
+                        )
+                    },
+                );
+                changed |= *old_value != new_value;
+                liveness.get_mut(&bb).unwrap()[pt] = new_value;
+            }
+        }
+
+        if !changed {
+            return liveness;
+        }
+    }
+}
diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs
deleted file mode 100644
index 5db49ec467977076ca1d82f933e23020f18ce2f4..0000000000000000000000000000000000000000
--- a/hercules_opt/src/legalize_reference_semantics.rs
+++ /dev/null
@@ -1,1236 +0,0 @@
-extern crate bitvec;
-extern crate hercules_cg;
-extern crate hercules_ir;
-
-use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
-use std::iter::{empty, once, zip, FromIterator};
-
-use self::bitvec::prelude::*;
-
-use self::hercules_cg::*;
-use self::hercules_ir::*;
-
-use crate::*;
-
-/*
- * Top level function to legalize the reference semantics of a Hercules IR
- * function. Hercules IR is a value semantics representation, meaning that all
- * program state is in the form of copyable values, and mutation takes place by
- * making a new value that is a copy of the old value with some modification.
- * This representation is extremely convenient for optimization, but is not good
- * for code generation, where we need to generate code with references to get
- * good performance. Hercules IR can alternatively be interpreted using
- * reference semantics, where pointers to collection objects are passed around,
- * read from, and written to. However, the value semantics and reference
- * semantics interpretation of a Hercules IR function may not be equal - this
- * pass transforms a Hercules IR function such that its new value semantics is
- * the same as its old value semantics and that its new reference semantics is
- * the same as its new value semantics. This pass returns a placement of nodes
- * into ordered basic blocks, since the reference semantics of a function
- * depends on the order of execution with respect to anti-dependencies. Clones
- * are inserted sparingly when there are two write users of a single collection
- * or if a read user cannot be scheduled before a write user.
- */
-pub fn legalize_reference_semantics(
-    editor: &mut FunctionEditor,
-    def_use: &ImmutableDefUseMap,
-    reverse_postorder: &Vec<NodeID>,
-    typing: &Vec<TypeID>,
-    control_subgraph: &Subgraph,
-    dom: &DomTree,
-    fork_join_map: &HashMap<NodeID, NodeID>,
-    loops: &LoopTree,
-    objects: &CollectionObjects,
-) -> Option<BasicBlocks> {
-    // Repeatedly try to place nodes into basic blocks. If clones are induced,
-    // re-try. Specifically, repeat the following procedure until no new clones:
-    //
-    // 1. Attempt to place nodes in basic blocks. If a node can't be placed due
-    //    to anti-dependency edges, induce a clone on the read and go back to
-    //    step 1.
-    // 2. Check for any write-induced clones. If there are any, go back to step
-    //    1.
-    //
-    // Since each analysis needs to be re-calculated in each iteration, this
-    // function just implements the body of the described loop. The re-try logic
-    // is found in pass.rs. When a re-try is needed, no basic block assignment
-    // is returned. When a re-try isn't needed (no new clones were found), a
-    // basic block assignment is returned.
-    let bbs = match basic_blocks(
-        editor.func(),
-        editor.func_id(),
-        def_use,
-        reverse_postorder,
-        control_subgraph,
-        dom,
-        loops,
-        fork_join_map,
-        objects,
-    ) {
-        Ok(bbs) => bbs,
-        Err((obj, reader)) => {
-            todo!();
-            return None;
-        }
-    };
-    if materialize_clones(editor, typing, control_subgraph, dom, loops, objects, &bbs) {
-        None
-    } else {
-        Some(bbs)
-    }
-}
-
-/*
- * Top level global code motion function. Assigns each data node to one of its
- * immediate control use / user nodes, forming (unordered) basic blocks. Returns
- * the control node / basic block each node is in. Takes in a partial
- * partitioning that must be respected. Based on the schedule-early-schedule-
- * late method from Cliff Click's PhD thesis. May fail if an anti-dependency
- * edge can't be satisfied - in this case, a clone that has to be induced is
- * returned instead.
- */
-fn basic_blocks(
-    function: &Function,
-    func_id: FunctionID,
-    def_use: &ImmutableDefUseMap,
-    reverse_postorder: &Vec<NodeID>,
-    control_subgraph: &Subgraph,
-    dom: &DomTree,
-    loops: &LoopTree,
-    fork_join_map: &HashMap<NodeID, NodeID>,
-    objects: &CollectionObjects,
-) -> Result<BasicBlocks, (NodeID, NodeID)> {
-    let mut bbs: Vec<Option<NodeID>> = vec![None; function.nodes.len()];
-
-    // Step 1: assign the basic block locations of all nodes that must be in a
-    // specific block. This includes control nodes as well as some special data
-    // nodes, such as phis.
-    for idx in 0..function.nodes.len() {
-        match function.nodes[idx] {
-            Node::Phi { control, data: _ } => bbs[idx] = Some(control),
-            Node::ThreadID {
-                control,
-                dimension: _,
-            } => bbs[idx] = Some(control),
-            Node::Reduce {
-                control,
-                init: _,
-                reduct: _,
-            } => bbs[idx] = Some(control),
-            Node::Call {
-                control,
-                function: _,
-                dynamic_constants: _,
-                args: _,
-            } => bbs[idx] = Some(control),
-            Node::Parameter { index: _ } => bbs[idx] = Some(NodeID::new(0)),
-            Node::Constant { id: _ } => bbs[idx] = Some(NodeID::new(0)),
-            Node::DynamicConstant { id: _ } => bbs[idx] = Some(NodeID::new(0)),
-            _ if function.nodes[idx].is_control() => bbs[idx] = Some(NodeID::new(idx)),
-            _ => {}
-        }
-    }
-
-    // Step 2: schedule early. Place nodes in the earliest position they could
-    // go - use worklist to iterate nodes.
-    let mut schedule_early = bbs.clone();
-    let mut worklist = VecDeque::from(reverse_postorder.clone());
-    while let Some(id) = worklist.pop_front() {
-        if schedule_early[id.idx()].is_some() {
-            continue;
-        }
-
-        // For every use, check what block is its "schedule early" block. This
-        // node goes in the lowest block amongst those blocks.
-        let use_places: Option<Vec<NodeID>> = get_uses(&function.nodes[id.idx()])
-            .as_ref()
-            .into_iter()
-            .map(|id| *id)
-            .map(|id| schedule_early[id.idx()])
-            .collect();
-        if let Some(use_places) = use_places {
-            // If every use has been placed, we can place this node as the
-            // lowest place in the domtree that dominates all of the use places.
-            let lowest = dom.lowest_amongst(use_places.into_iter());
-            schedule_early[id.idx()] = Some(lowest);
-        } else {
-            // If not, then just push this node back on the worklist.
-            worklist.push_back(id);
-        }
-    }
-
-    // Step 3: find anti-dependence edges. An anti-dependence edge needs to be
-    // drawn between a collection reading node and a collection mutating node
-    // when the following conditions are true:
-    //
-    // 1: The reading and mutating nodes may involve the same collection.
-    // 2: The node producing the collection used by the reading node is in a
-    //    schedule early block that dominates the schedule early block of the
-    //    mutating node. The node producing the collection used by the reading
-    //    node may be an originator of a collection, phi or reduce, or mutator,
-    //    but not forwarding read - forwarding reads are collapsed, and the
-    //    bottom read is treated as reading from the transitive parent of the
-    //    forwarding read(s).
-    let mut antideps = BTreeSet::new();
-    for id in reverse_postorder.iter() {
-        // Find a terminating read node and the collections it reads.
-        let terminating_reads: BTreeSet<_> =
-            terminating_reads(function, func_id, *id, objects).collect();
-        if !terminating_reads.is_empty() {
-            // Walk forwarding reads to find anti-dependency roots.
-            let mut workset = terminating_reads.clone();
-            let mut roots = BTreeSet::new();
-            while let Some(pop) = workset.pop_first() {
-                let forwarded: BTreeSet<_> =
-                    forwarding_reads(function, func_id, pop, objects).collect();
-                if forwarded.is_empty() {
-                    roots.insert(pop);
-                } else {
-                    workset.extend(forwarded);
-                }
-            }
-
-            // For each root, find mutating nodes dominated by the root that
-            // modify an object read on any input of the current node (the
-            // terminating read).
-            // TODO: make this less outrageously inefficient.
-            let func_objects = &objects[&func_id];
-            for root in roots.iter() {
-                let root_early = schedule_early[root.idx()].unwrap();
-                let mut root_block_iterated_users: BTreeSet<NodeID> = BTreeSet::new();
-                let mut workset = BTreeSet::new();
-                workset.insert(*root);
-                while let Some(pop) = workset.pop_first() {
-                    let users = def_use.get_users(pop).into_iter().filter(|user| {
-                        !function.nodes[user.idx()].is_phi()
-                            && !function.nodes[user.idx()].is_reduce()
-                            && schedule_early[user.idx()].unwrap() == root_early
-                    });
-                    workset.extend(users.clone());
-                    root_block_iterated_users.extend(users);
-                }
-                let read_objs: BTreeSet<_> = terminating_reads
-                    .iter()
-                    .map(|read_use| func_objects.objects(*read_use).into_iter())
-                    .flatten()
-                    .map(|id| *id)
-                    .collect();
-                for mutator in reverse_postorder.iter() {
-                    let mutator_early = schedule_early[mutator.idx()].unwrap();
-                    if dom.does_dom(root_early, mutator_early)
-                        && (root_early != mutator_early
-                            || root_block_iterated_users.contains(&mutator))
-                        && mutating_objects(function, func_id, *mutator, objects)
-                            .any(|mutated| read_objs.contains(&mutated))
-                        && id != mutator
-                    {
-                        antideps.insert((*id, *mutator));
-                    }
-                }
-            }
-        }
-    }
-    let mut antideps_uses = vec![vec![]; function.nodes.len()];
-    let mut antideps_users = vec![vec![]; function.nodes.len()];
-    for (reader, mutator) in antideps.iter() {
-        antideps_uses[mutator.idx()].push(*reader);
-        antideps_users[reader.idx()].push(*mutator);
-    }
-
-    // Step 4: schedule late and pick each nodes final position. Since the late
-    // schedule of each node depends on the final positions of its users, these
-    // two steps must be fused. Compute their latest position, then use the
-    // control dependent + shallow loop heuristic to actually place them.
-    let join_fork_map: HashMap<NodeID, NodeID> = fork_join_map
-        .into_iter()
-        .map(|(fork, join)| (*join, *fork))
-        .collect();
-    let mut worklist = VecDeque::from_iter(reverse_postorder.into_iter().map(|id| *id).rev());
-    while let Some(id) = worklist.pop_front() {
-        if bbs[id.idx()].is_some() {
-            continue;
-        }
-
-        // Calculate the least common ancestor of user blocks, a.k.a. the "late"
-        // schedule.
-        let calculate_lca = || -> Option<_> {
-            let mut lca = None;
-            // Helper to incrementally update the LCA.
-            let mut update_lca = |a| {
-                if let Some(acc) = lca {
-                    lca = Some(dom.least_common_ancestor(acc, a));
-                } else {
-                    lca = Some(a);
-                }
-            };
-
-            // For every user, consider where we need to be to directly dominate the
-            // user.
-            for user in def_use
-                .get_users(id)
-                .as_ref()
-                .into_iter()
-                .chain(antideps_users[id.idx()].iter())
-                .map(|id| *id)
-            {
-                if let Node::Phi { control, data } = &function.nodes[user.idx()] {
-                    // For phis, we need to dominate the block jumping to the phi in
-                    // the slot that corresponds to our use.
-                    for (control, data) in
-                        zip(get_uses(&function.nodes[control.idx()]).as_ref(), data)
-                    {
-                        if id == *data {
-                            update_lca(*control);
-                        }
-                    }
-                } else if let Node::Reduce {
-                    control,
-                    init,
-                    reduct,
-                } = &function.nodes[user.idx()]
-                {
-                    // For reduces, we need to either dominate the block right
-                    // before the fork if we're the init input, or we need to
-                    // dominate the join if we're the reduct input.
-                    if id == *init {
-                        let before_fork = function.nodes[join_fork_map[control].idx()]
-                            .try_fork()
-                            .unwrap()
-                            .0;
-                        update_lca(before_fork);
-                    } else {
-                        assert_eq!(id, *reduct);
-                        update_lca(*control);
-                    }
-                } else {
-                    // For everything else, we just need to dominate the user.
-                    update_lca(bbs[user.idx()]?);
-                }
-            }
-
-            Some(lca)
-        };
-
-        // Check if all users have been placed. If one of them hasn't, then add
-        // this node back on to the worklist.
-        let Some(lca) = calculate_lca() else {
-            worklist.push_back(id);
-            continue;
-        };
-
-        // Look between the LCA and the schedule early location to place the
-        // node.
-        let schedule_early = schedule_early[id.idx()].unwrap();
-        let schedule_late = lca.unwrap_or(schedule_early);
-        let mut chain = dom
-            // If the node has no users, then it doesn't really matter where we
-            // place it - just place it at the early placement.
-            .chain(schedule_late, schedule_early);
-
-        if let Some(mut location) = chain.next() {
-            /*
-            while let Some(control_node) = chain.next() {
-                // If the next node further up the dominator tree is in a shallower
-                // loop nest or if we can get out of a reduce loop when we don't
-                // need to be in one, place this data node in a higher-up location.
-                let old_nest = loops
-                    .header_of(location)
-                    .map(|header| loops.nesting(header).unwrap());
-                let new_nest = loops
-                    .header_of(control_node)
-                    .map(|header| loops.nesting(header).unwrap());
-                let shallower_nest = if let (Some(old_nest), Some(new_nest)) = (old_nest, new_nest)
-                {
-                    old_nest > new_nest
-                } else {
-                    // If the new location isn't a loop, it's nesting level should
-                    // be considered "shallower" if the current location is in a
-                    // loop.
-                    old_nest.is_some()
-                };
-                // This will move all nodes that don't need to be in reduce loops
-                // outside of reduce loops. Nodes that do need to be in a reduce
-                // loop use the reduce node forming the loop, so the dominator chain
-                // will consist of one block, and this loop won't ever iterate.
-                let currently_at_join = function.nodes[location.idx()].is_join();
-                if shallower_nest || currently_at_join {
-                    location = control_node;
-                }
-            }
-            */
-
-            bbs[id.idx()] = Some(location);
-        } else {
-            // If there is no valid location for this node, then it's a reading
-            // node of a collection that can't be placed above a mutation that
-            // anti-depend uses it. Thus, a clone needs to be induced.
-            todo!()
-        }
-    }
-    let bbs: Vec<_> = bbs.into_iter().map(Option::unwrap).collect();
-
-    // Step 5: determine the order of nodes inside each block. Use worklist to
-    // add nodes to blocks in order that obeys dependencies.
-    let mut order: Vec<Vec<NodeID>> = vec![vec![]; function.nodes.len()];
-    let mut worklist = VecDeque::from_iter(
-        reverse_postorder
-            .into_iter()
-            .filter(|id| !function.nodes[id.idx()].is_control()),
-    );
-    let mut visited = bitvec![u8, Lsb0; 0; function.nodes.len()];
-    let mut no_change_iters = 0;
-    while no_change_iters <= worklist.len()
-        && let Some(id) = worklist.pop_front()
-    {
-        let node = &function.nodes[id.idx()];
-        if node.is_phi()
-            || node.is_reduce()
-            || get_uses(node)
-                .as_ref()
-                .into_iter()
-                .chain(antideps_uses[id.idx()].iter())
-                .all(|u| {
-                    function.nodes[u.idx()].is_control()
-                        || bbs[u.idx()] != bbs[id.idx()]
-                        || visited[u.idx()]
-                })
-        {
-            order[bbs[id.idx()].idx()].push(*id);
-            visited.set(id.idx(), true);
-            no_change_iters = 0;
-        } else {
-            worklist.push_back(id);
-            no_change_iters += 1;
-        }
-    }
-
-    if no_change_iters == 0 {
-        Ok((bbs, order))
-    } else {
-        // If the worklist exited without finishing, then there's at least one
-        // reading node of a collection that is in a anti-depend + normal depend
-        // use cycle with a mutating node. This cycle must be broken by inducing
-        // a clone.
-        todo!()
-    }
-}
-
-fn terminating_reads<'a>(
-    function: &'a Function,
-    func_id: FunctionID,
-    reader: NodeID,
-    objects: &'a CollectionObjects,
-) -> Box<dyn Iterator<Item = NodeID> + 'a> {
-    match function.nodes[reader.idx()] {
-        Node::Read {
-            collect,
-            indices: _,
-        } if objects[&func_id].objects(reader).is_empty() => Box::new(once(collect)),
-        Node::Write {
-            collect: _,
-            data,
-            indices: _,
-        } if !objects[&func_id].objects(data).is_empty() => Box::new(once(data)),
-        Node::Call {
-            control: _,
-            function: callee,
-            dynamic_constants: _,
-            ref args,
-        } => Box::new(args.into_iter().enumerate().filter_map(move |(idx, arg)| {
-            let objects = &objects[&callee];
-            let returns = objects.returned_objects();
-            let param_obj = objects.param_to_object(idx)?;
-            if !objects.is_mutated(param_obj) && !returns.contains(&param_obj) {
-                Some(*arg)
-            } else {
-                None
-            }
-        })),
-        _ => Box::new(empty()),
-    }
-}
-
-fn forwarding_reads<'a>(
-    function: &'a Function,
-    func_id: FunctionID,
-    reader: NodeID,
-    objects: &'a CollectionObjects,
-) -> Box<dyn Iterator<Item = NodeID> + 'a> {
-    match function.nodes[reader.idx()] {
-        Node::Read {
-            collect,
-            indices: _,
-        } if !objects[&func_id].objects(reader).is_empty() => Box::new(once(collect)),
-        Node::Ternary {
-            op: TernaryOperator::Select,
-            first: _,
-            second,
-            third,
-        } if !objects[&func_id].objects(reader).is_empty() => {
-            Box::new(once(second).chain(once(third)))
-        }
-        Node::Call {
-            control: _,
-            function: callee,
-            dynamic_constants: _,
-            ref args,
-        } => Box::new(args.into_iter().enumerate().filter_map(move |(idx, arg)| {
-            let objects = &objects[&callee];
-            let returns = objects.returned_objects();
-            let param_obj = objects.param_to_object(idx)?;
-            if !objects.is_mutated(param_obj) && returns.contains(&param_obj) {
-                Some(*arg)
-            } else {
-                None
-            }
-        })),
-        _ => Box::new(empty()),
-    }
-}
-
-fn mutating_objects<'a>(
-    function: &'a Function,
-    func_id: FunctionID,
-    mutator: NodeID,
-    objects: &'a CollectionObjects,
-) -> Box<dyn Iterator<Item = CollectionObjectID> + 'a> {
-    match function.nodes[mutator.idx()] {
-        Node::Write {
-            collect,
-            data: _,
-            indices: _,
-        } => Box::new(objects[&func_id].objects(collect).into_iter().map(|id| *id)),
-        Node::Call {
-            control: _,
-            function: callee,
-            dynamic_constants: _,
-            ref args,
-        } => Box::new(
-            args.into_iter()
-                .enumerate()
-                .filter_map(move |(idx, arg)| {
-                    let callee_objects = &objects[&callee];
-                    let param_obj = callee_objects.param_to_object(idx)?;
-                    if callee_objects.is_mutated(param_obj) {
-                        Some(objects[&func_id].objects(*arg).into_iter().map(|id| *id))
-                    } else {
-                        None
-                    }
-                })
-                .flatten(),
-        ),
-        _ => Box::new(empty()),
-    }
-}
-
-/*
- * Top level function to materialize clones of collections. This transformation
- * eliminates the possibility of multiple independent writes (including dynamic
- * writes) to a single collection by introducing extra collection constants and
- * inserting explicit clones. This allows us to make the simplifying assumption
- * in the backend that collections have reference, rather than value, semantics.
- * The pass calling this function is mandatory for correctness.
- */
-fn materialize_clones(
-    editor: &mut FunctionEditor,
-    typing: &Vec<TypeID>,
-    control_subgraph: &Subgraph,
-    dom: &DomTree,
-    loops: &LoopTree,
-    objects: &CollectionObjects,
-    bbs: &BasicBlocks,
-) -> bool {
-    let rev_po = control_subgraph.rev_po(NodeID::new(0));
-    let mut total_num_pts = 0;
-    let mut bb_to_prefix_sum = vec![0; bbs.0.len()];
-    for ((idx, bb), insts) in zip(bbs.0.iter().enumerate(), bbs.1.iter()) {
-        if idx == bb.idx() {
-            bb_to_prefix_sum[idx] = total_num_pts;
-            total_num_pts += insts.len() + 1;
-        }
-    }
-
-    // Calculate two lattices - one that includes back edges, and one that
-    // doesn't. We want to handle simple clones before loop induced clones, so
-    // we first materialize clones based on the no-back-edges lattice, and hten
-    // based on the full lattice.
-    let mut no_back_edge_lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> =
-        vec![BTreeMap::new(); total_num_pts];
-    used_collections_dataflow(
-        editor,
-        &mut no_back_edge_lattice,
-        &rev_po,
-        &bb_to_prefix_sum,
-        control_subgraph,
-        objects,
-        bbs,
-    );
-    let mut super_value = BTreeMap::new();
-    if find_clones(
-        editor,
-        &super_value,
-        &no_back_edge_lattice,
-        &rev_po,
-        &typing,
-        control_subgraph,
-        dom,
-        loops,
-        objects,
-        &bb_to_prefix_sum,
-        bbs,
-    ) {
-        return true;
-    }
-
-    // After inducing simple clones, calculate the full lattice and materialize
-    // any loop induced clones.
-    let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts];
-    loop {
-        let changed = used_collections_dataflow(
-            editor,
-            &mut lattice,
-            &rev_po,
-            &bb_to_prefix_sum,
-            control_subgraph,
-            objects,
-            bbs,
-        );
-        if !changed {
-            break;
-        }
-    }
-    for value in lattice.iter() {
-        meet(&mut super_value, value);
-    }
-    find_clones(
-        editor,
-        &super_value,
-        &lattice,
-        &rev_po,
-        &typing,
-        control_subgraph,
-        dom,
-        loops,
-        objects,
-        &bb_to_prefix_sum,
-        bbs,
-    )
-}
-
-fn meet(left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>, right: &BTreeMap<NodeID, BTreeSet<NodeID>>) {
-    for (used, users) in right.into_iter() {
-        left.entry(*used).or_default().extend(users.into_iter());
-    }
-}
-
-/*
- * Helper function to run a single iteration of the used collections dataflow
- * analysis. Returns whether the lattice was changed. The lattice maps each
- * program point to a set of used values and their possible users. Top is that
- * no nodes are used yet.
- */
-fn used_collections_dataflow(
-    editor: &FunctionEditor,
-    lattice: &mut Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
-    rev_po: &Vec<NodeID>,
-    bb_to_prefix_sum: &Vec<usize>,
-    control_subgraph: &Subgraph,
-    objects: &CollectionObjects,
-    bbs: &BasicBlocks,
-) -> bool {
-    // Run dataflow analysis to figure out which accesses to collections induce
-    // clones. This dataflow analysis depends on basic block assignments and is
-    // more analogous to standard dataflow analysis in CFG + SSA IRs. This is
-    // the only place this form is used, so just hardcode it here.
-    //
-    // This forward dataflow analysis tracks which collections are used at each
-    // program point, and by what user nodes. Collections are referred to using
-    // node IDs. Specifically:
-    //
-    // - Phi - a phi node adds its inputs to the used set and removes itself
-    //   from the used set. If a phi uses an ID that is used along the edge of
-    //   the corresponding predecessor, a clone is induced.
-    // - Select - a select node adds its inputs to the used set and removes
-    //   itself from the used set. If either use is already used, a clone is
-    //   induced.
-    // - Reduce - a reduce node adds its inputs to the used set and removes
-    //   itself from the used set. If the `init` input is already used, a clone
-    //   is induced. If the `reduct` input is used at the end of the basic block
-    //   containing the reduce, then a clone is induced. At the end of the basic
-    //   block, the reduce removes itself from the used set.
-    // - Read - a read node that reads a sub-collections from a collection,
-    //   rather than reading a primitive type, adds its input to the used set
-    //   and removes itself from the used set. If the `collect` input is already
-    //   used, a clone is induced.
-    // - Write - a write node adds its `collect` input to the used set and
-    //   removes itself from the used set. If the `collect` input is already
-    //   used, a clone is induced.
-    // - Call - a call node adds any mutated input or input that may be returned
-    //   to the used set and removes itself from the used set. If any mutated
-    //   input is already used, a clone is induced.
-    //
-    // Reads of sub-collections (select, some read, and some call nodes) use a
-    // collection because they may have downstream writes that depend on the new
-    // "sub-view" of the same collection. This does not include reads that "end"
-    // (most reads, some calls, the `data` input of a write). This analysis does
-    // not consider parallel mutations in fork-joins.
-    let nodes = &editor.func().nodes;
-    let func_id = editor.func_id();
-    let mut changed = false;
-
-    for bb in rev_po.iter() {
-        // The lattice value of the first point is the meet of the
-        // predecessor terminating lattice values.
-        let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]];
-        let mut new_top_value = BTreeMap::new();
-        // Clearing `top_value` is not necessary since used nodes are never
-        // removed from lattice values, only added.
-        for pred in control_subgraph.preds(*bb) {
-            let last_pt = bbs.1[pred.idx()].len();
-            meet(
-                &mut new_top_value,
-                &lattice[bb_to_prefix_sum[pred.idx()] + last_pt],
-            );
-        }
-        changed |= *old_top_value != new_top_value;
-        lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value;
-
-        // The lattice value of following points are determined by their
-        // immediate preceding instructions.
-        let insts = &bbs.1[bb.idx()];
-        for (prev_pt, inst) in insts.iter().enumerate() {
-            let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1];
-            let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
-            let mut new_value = prev_value.clone();
-            match nodes[inst.idx()] {
-                Node::Phi {
-                    control: _,
-                    ref data,
-                } if !objects[&func_id].objects(*inst).is_empty() => {
-                    for elem in data {
-                        new_value.entry(*elem).or_default().insert(*inst);
-                    }
-                    new_value.remove(inst);
-                }
-                Node::Ternary {
-                    op: TernaryOperator::Select,
-                    first: _,
-                    second,
-                    third,
-                }
-                | Node::Reduce {
-                    control: _,
-                    init: second,
-                    reduct: third,
-                } => {
-                    if !objects[&func_id].objects(*inst).is_empty() {
-                        new_value.entry(second).or_default().insert(*inst);
-                        new_value.entry(third).or_default().insert(*inst);
-                        new_value.remove(inst);
-                    }
-                }
-                Node::Read {
-                    collect,
-                    indices: _,
-                } if !objects[&func_id].objects(*inst).is_empty() => {
-                    new_value.entry(collect).or_default().insert(*inst);
-                    new_value.remove(inst);
-                }
-                Node::Write {
-                    collect,
-                    data: _,
-                    indices: _,
-                } => {
-                    new_value.entry(collect).or_default().insert(*inst);
-                    new_value.remove(inst);
-                }
-                Node::Call {
-                    control: _,
-                    function: callee,
-                    dynamic_constants: _,
-                    ref args,
-                } => {
-                    let callee_objects = &objects[&callee];
-                    for (param_idx, arg) in args.into_iter().enumerate() {
-                        if callee_objects
-                            .param_to_object(param_idx)
-                            .map(|object| {
-                                callee_objects.is_mutated(object)
-                                    || callee_objects.returned_objects().contains(&object)
-                            })
-                            .unwrap_or(false)
-                        {
-                            new_value.entry(*arg).or_default().insert(*inst);
-                        }
-                    }
-                    new_value.remove(inst);
-                }
-                _ => {}
-            }
-            changed |= *old_value != new_value;
-            lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value;
-        }
-
-        // Handle reduces in this block specially at the very end.
-        let last_pt = insts.len();
-        let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt];
-        let mut new_bottom_value = old_bottom_value.clone();
-        for inst in insts.iter() {
-            if let Node::Reduce {
-                control: _,
-                init: _,
-                reduct,
-            } = nodes[inst.idx()]
-            {
-                assert!(
-                    new_bottom_value.contains_key(&reduct),
-                    "PANIC: Can't handle clones inside a reduction cycle currently."
-                );
-                new_bottom_value.remove(inst);
-            }
-        }
-        changed |= *old_bottom_value != new_bottom_value;
-        lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value;
-    }
-
-    changed
-}
-
-/*
- * Helper function to induce a clone once an object with multiple users has been
- * found.
- */
-fn induce_clone(
-    editor: &mut FunctionEditor,
-    object: NodeID,
-    user: NodeID,
-    value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
-    super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
-    lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
-    rev_po: &Vec<NodeID>,
-    typing: &Vec<TypeID>,
-    control_subgraph: &Subgraph,
-    dom: &DomTree,
-    loops: &LoopTree,
-    bb_to_prefix_sum: &Vec<usize>,
-    bbs: &BasicBlocks,
-) {
-    // If `user` already used `object` and tries to use it again, then the
-    // clone is a "loop induced" clone. Otherwise, it's a simple clone.
-    if !value[&object].contains(&user) {
-        let success = editor.edit(|mut edit| {
-            // Create the constant collection object for allocation.
-            let object_ty = typing[object.idx()];
-            let object_cons = edit.add_zero_constant(object_ty);
-            let cons_node = edit.add_node(Node::Constant { id: object_cons });
-
-            // Create the clone into the new constant collection.
-            let clone_node = edit.add_node(Node::Write {
-                collect: cons_node,
-                data: object,
-                indices: vec![].into_boxed_slice(),
-            });
-
-            // Make user use the cloned object.
-            edit.replace_all_uses_where(object, clone_node, |id| *id == user)
-        });
-        assert!(success);
-    } else {
-        // Figure out where to place that phi. This is the deepest
-        // loop header where `user` is responsible for making `object` used
-        // used at the top of the block, and the block dominates the block
-        // containing `user`. If `user` is a phi, then the region it's
-        // attached to is excluded from eligibility.
-        let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| {
-            lattice[bb_to_prefix_sum[bb.idx()]]
-                .get(&object)
-                .unwrap_or(&BTreeSet::new())
-                .contains(&user)
-                && dom.does_dom(*bb, bbs.0[user.idx()])
-                && loops.contains(*bb)
-                && loops.is_in_loop(*bb, bbs.0[user.idx()])
-                && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()])
-        });
-        let top_block = eligible_blocks
-            .max_by_key(|bb| loops.nesting(*bb).unwrap())
-            .unwrap();
-        assert!(editor.func().nodes[top_block.idx()].is_region());
-
-        // Figure out the users of `object` that we need to phi back
-        // upwards. Assign each user a number indicating how far down the
-        // user chain it is, higher is farther down. This is used for
-        // picking the most downstream user later.
-        let mut users: BTreeMap<NodeID, usize> = BTreeMap::new();
-        let mut workset: BTreeSet<NodeID> = BTreeSet::new();
-        workset.insert(object);
-        let mut chain_ordering = 1;
-        assert!(!super_value.is_empty());
-        while let Some(pop) = workset.pop_first() {
-            let iterated_users: BTreeSet<_> = super_value
-                .get(&pop)
-                .map(|users| users.into_iter())
-                .into_iter()
-                .flatten()
-                .map(|id| *id)
-                .filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()]))
-                .collect();
-            workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id)));
-            for user in iterated_users {
-                *users.entry(user).or_default() = chain_ordering;
-                chain_ordering += 1;
-            }
-        }
-
-        // The fringe users may not dominate any predecessors of the loop
-        // header. The following is some Juno code that exposes this:
-        //
-        // fn problematic(a : size) -> i32 {
-        //   for i = 0 to a {
-        //     let arr : i32[1];
-        //     for j = 0 to a {
-        //       arr[0] = 1;
-        //     }
-        //   }
-        //   return 0;
-        // }
-        //
-        // Note that `arr` induces a clone each iteration, since its value
-        // needs to be reset to all zeros. However, it should also be noted
-        // that the most fringe user of `arr`, the write inside the inner
-        // loop, does not dominate the bottom of the outer loop. Thus, we
-        // need to insert a phi in the bottom block of the outer loop to
-        // retrieve either the write, or `arr` before the inner loop. The
-        // general version of this problem requires the following solution.
-        // Our goal is to figure out which downstream user represents
-        // `object` at each block in the loop. We first assign each block
-        // containing a user the most downstream user it contains. Then, we
-        // create a dummy phi for every region (including the header) in the
-        // loop, which is the downstream user for that block. Then, every
-        // other block is assigned the downstream user of its single
-        // predecessor. This basically amounts to recreating SSA for
-        // `object` inside the loop.
-        let mut user_per_loop_bb = BTreeMap::new();
-        let mut added_phis = BTreeMap::new();
-        let mut top_phi = NodeID::new(0);
-        // Assign existing users.
-        for (user, ordering) in users.iter() {
-            let bb = bbs.0[user.idx()];
-            if let Some(old_user) = user_per_loop_bb.get(&bb)
-                && users[old_user] > *ordering
-            {
-            } else {
-                user_per_loop_bb.insert(bb, *user);
-            }
-        }
-        // Assign dummy phis.
-        for bb in loops.nodes_in_loop(top_block) {
-            if (!user_per_loop_bb.contains_key(&bb) || bb == top_block)
-                && editor.func().nodes[bb.idx()].is_region()
-            {
-                let success = editor.edit(|mut edit| {
-                    let phi_node = edit.add_node(Node::Phi {
-                        control: bb,
-                        data: empty().collect(),
-                    });
-                    if bb != top_block || !user_per_loop_bb.contains_key(&bb) {
-                        user_per_loop_bb.insert(bb, phi_node);
-                    }
-                    if bb == top_block {
-                        top_phi = phi_node;
-                    }
-                    added_phis.insert(phi_node, bb);
-                    Ok(edit)
-                });
-                assert!(success);
-            }
-        }
-        // Assign users for the rest of the blocks.
-        for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) {
-            if !user_per_loop_bb.contains_key(&bb) {
-                assert!(control_subgraph.preds(*bb).count() == 1);
-                user_per_loop_bb.insert(
-                    *bb,
-                    user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()],
-                );
-            }
-        }
-
-        // Induce the clone.
-        let success = editor.edit(|mut edit| {
-            // Create the constant collection object for allocation.
-            let object_ty = typing[object.idx()];
-            let object_cons = edit.add_zero_constant(object_ty);
-            let cons_node = edit.add_node(Node::Constant { id: object_cons });
-
-            // Create the phis.
-            let mut phi_map = BTreeMap::new();
-            let mut real_phis = BTreeSet::new();
-            for (dummy, bb) in added_phis {
-                let real = edit.add_node(Node::Phi {
-                    control: bb,
-                    data: control_subgraph
-                        .preds(bb)
-                        .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node))
-                        .collect(),
-                });
-                phi_map.insert(dummy, real);
-                real_phis.insert(real);
-            }
-
-            // Create the clone into the phi.
-            let real_top_phi = phi_map[&top_phi];
-            let clone_node = edit.add_node(Node::Write {
-                collect: real_top_phi,
-                data: object,
-                indices: vec![].into_boxed_slice(),
-            });
-
-            // Make users use the cloned object.
-            edit = edit.replace_all_uses_where(object, clone_node, |id| {
-                id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()])
-            })?;
-
-            // Get rid of the dummy phis.
-            for (dummy, real) in phi_map {
-                edit = edit.replace_all_uses(dummy, real)?;
-                edit = edit.delete_node(dummy)?;
-            }
-
-            // Make phis use the clone instead of the top phi.
-            edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)
-        });
-        assert!(success);
-
-        // De-duplicate phis.
-        gvn(editor, false);
-
-        // Get rid of unused phis.
-        dce(editor);
-
-        // Simplify phis.
-        phi_elim(editor);
-    }
-}
-
-/*
- * Helper function to analyze lattice values at each program point and find
- * multiple dynamic users of a single write. Return as soon as any clone is
- * found.
- */
-fn find_clones(
-    editor: &mut FunctionEditor,
-    super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
-    lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
-    rev_po: &Vec<NodeID>,
-    typing: &Vec<TypeID>,
-    control_subgraph: &Subgraph,
-    dom: &DomTree,
-    loops: &LoopTree,
-    objects: &CollectionObjects,
-    bb_to_prefix_sum: &Vec<usize>,
-    bbs: &BasicBlocks,
-) -> bool {
-    let nodes = &editor.func().nodes;
-    let func_id = editor.func_id();
-    for bb in rev_po.iter().rev() {
-        let insts = &bbs.1[bb.idx()];
-        // Accumulate predecessor bottom used sets for phis. Phis are special in
-        // that they need to be path sensitive, but multiple phis in a single
-        // block may use a single collection, and that needs to induce a clone.
-        let mut phi_acc_bottoms = BTreeMap::new();
-        for (prev_pt, inst) in insts.iter().enumerate() {
-            let value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
-            match nodes[inst.idx()] {
-                Node::Phi {
-                    control: _,
-                    ref data,
-                } => {
-                    // In phis, check if an argument is already used in the
-                    // predecessor's bottom lattice value (phis need to be path-
-                    // sensitive).
-                    for (pred, arg) in zip(control_subgraph.preds(*bb), data) {
-                        let bottom = phi_acc_bottoms.entry(pred).or_insert_with(|| {
-                            let last_pt = bbs.1[pred.idx()].len();
-                            let bottom = &lattice[bb_to_prefix_sum[pred.idx()] + last_pt];
-                            bottom.clone()
-                        });
-                        if bottom.contains_key(&arg) {
-                            induce_clone(
-                                editor,
-                                *arg,
-                                *inst,
-                                bottom,
-                                super_value,
-                                lattice,
-                                rev_po,
-                                typing,
-                                control_subgraph,
-                                dom,
-                                loops,
-                                bb_to_prefix_sum,
-                                bbs,
-                            );
-                            return true;
-                        } else {
-                            // Subsequent phis using `arg` along the same
-                            // predecessor induce a clone.
-                            bottom.insert(*arg, once(*inst).collect());
-                        }
-                    }
-                }
-                Node::Ternary {
-                    op: TernaryOperator::Select,
-                    first: _,
-                    second,
-                    third,
-                } => {
-                    if value.contains_key(&second) {
-                        induce_clone(
-                            editor,
-                            second,
-                            *inst,
-                            &value,
-                            super_value,
-                            lattice,
-                            rev_po,
-                            typing,
-                            control_subgraph,
-                            dom,
-                            loops,
-                            bb_to_prefix_sum,
-                            bbs,
-                        );
-                        return true;
-                    }
-                    if value.contains_key(&third) {
-                        induce_clone(
-                            editor,
-                            third,
-                            *inst,
-                            &value,
-                            super_value,
-                            lattice,
-                            rev_po,
-                            typing,
-                            control_subgraph,
-                            dom,
-                            loops,
-                            bb_to_prefix_sum,
-                            bbs,
-                        );
-                        return true;
-                    }
-                }
-                Node::Reduce {
-                    control: _,
-                    init,
-                    reduct: _,
-                } => {
-                    if value.contains_key(&init) {
-                        induce_clone(
-                            editor,
-                            init,
-                            *inst,
-                            &value,
-                            super_value,
-                            lattice,
-                            rev_po,
-                            typing,
-                            control_subgraph,
-                            dom,
-                            loops,
-                            bb_to_prefix_sum,
-                            bbs,
-                        );
-                        return true;
-                    }
-                }
-                Node::Read {
-                    collect,
-                    indices: _,
-                } if !objects[&func_id].objects(*inst).is_empty() => {
-                    if value.contains_key(&collect) {
-                        induce_clone(
-                            editor,
-                            collect,
-                            *inst,
-                            &value,
-                            super_value,
-                            lattice,
-                            rev_po,
-                            typing,
-                            control_subgraph,
-                            dom,
-                            loops,
-                            bb_to_prefix_sum,
-                            bbs,
-                        );
-                        return true;
-                    }
-                }
-                Node::Write {
-                    collect,
-                    data: _,
-                    indices: _,
-                } => {
-                    if value.contains_key(&collect) {
-                        induce_clone(
-                            editor,
-                            collect,
-                            *inst,
-                            &value,
-                            super_value,
-                            lattice,
-                            rev_po,
-                            typing,
-                            control_subgraph,
-                            dom,
-                            loops,
-                            bb_to_prefix_sum,
-                            bbs,
-                        );
-                        return true;
-                    }
-                }
-                Node::Call {
-                    control: _,
-                    function: callee,
-                    dynamic_constants: _,
-                    ref args,
-                } => {
-                    let callee_objects = &objects[&callee];
-                    for (param_idx, arg) in args.into_iter().enumerate() {
-                        if callee_objects
-                            .param_to_object(param_idx)
-                            .map(|object| {
-                                callee_objects.is_mutated(object)
-                                    || callee_objects.returned_objects().contains(&object)
-                            })
-                            .unwrap_or(false)
-                            && value.contains_key(arg)
-                        {
-                            induce_clone(
-                                editor,
-                                *arg,
-                                *inst,
-                                value,
-                                super_value,
-                                lattice,
-                                rev_po,
-                                typing,
-                                control_subgraph,
-                                dom,
-                                loops,
-                                bb_to_prefix_sum,
-                                bbs,
-                            );
-                            return true;
-                        }
-                    }
-                }
-                _ => {}
-            }
-        }
-    }
-    false
-}
diff --git a/hercules_opt/src/lib.rs b/hercules_opt/src/lib.rs
index ed658b22481aa1678a5fe9b47c8ccaad84d47e57..08d183a7f8c2ddfc650bb1ef3ce385761a137def 100644
--- a/hercules_opt/src/lib.rs
+++ b/hercules_opt/src/lib.rs
@@ -1,17 +1,17 @@
 #![feature(let_chains)]
 
 pub mod ccp;
-pub mod clone_elim;
 pub mod dce;
 pub mod delete_uncalled;
 pub mod editor;
+pub mod float_collections;
 pub mod fork_concat_split;
 pub mod fork_guard_elim;
 pub mod forkify;
+pub mod gcm;
 pub mod gvn;
 pub mod inline;
 pub mod interprocedural_sroa;
-pub mod legalize_reference_semantics;
 pub mod outline;
 pub mod pass;
 pub mod phi_elim;
@@ -22,17 +22,17 @@ pub mod unforkify;
 pub mod utils;
 
 pub use crate::ccp::*;
-pub use crate::clone_elim::*;
 pub use crate::dce::*;
 pub use crate::delete_uncalled::*;
 pub use crate::editor::*;
+pub use crate::float_collections::*;
 pub use crate::fork_concat_split::*;
 pub use crate::fork_guard_elim::*;
 pub use crate::forkify::*;
+pub use crate::gcm::*;
 pub use crate::gvn::*;
 pub use crate::inline::*;
 pub use crate::interprocedural_sroa::*;
-pub use crate::legalize_reference_semantics::*;
 pub use crate::outline::*;
 pub use crate::pass::*;
 pub use crate::phi_elim::*;
diff --git a/hercules_opt/src/outline.rs b/hercules_opt/src/outline.rs
index 84cedb7629e45abb3abad5eac841b24f224a2698..239838334801c7048a3bcaaad0cd626f2c9d0dea 100644
--- a/hercules_opt/src/outline.rs
+++ b/hercules_opt/src/outline.rs
@@ -567,9 +567,7 @@ pub fn outline(
 }
 
 /*
- * Just outlines all of a function except the entry, return, and aggregate
- * constants. This is the minimum work needed to cause runtime Rust code to be
- * generated as necessary.
+ * Just outlines all of a function except the start and return nodes.
  */
 pub fn dumb_outline(
     editor: &mut FunctionEditor,
@@ -585,11 +583,7 @@ pub fn dumb_outline(
         .node_ids()
         .filter(|id| {
             let node = &editor.func().nodes[id.idx()];
-            if let Node::Constant { id } = editor.func().nodes[id.idx()] {
-                editor.get_constant(id).is_scalar()
-            } else {
-                !(node.is_start() || node.is_parameter() || node.is_return())
-            }
+            !(node.is_start() || node.is_parameter() || node.is_return())
         })
         .collect();
     outline(
diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs
index b136fef9c704b82c46877ed0c1d9c39bd7df793a..57fd464de82a36cab4c2d7c3758bd43b3d13eae3 100644
--- a/hercules_opt/src/pass.rs
+++ b/hercules_opt/src/pass.rs
@@ -39,8 +39,8 @@ pub enum Pass {
     ForkSplit,
     Unforkify,
     InferSchedules,
-    LegalizeReferenceSemantics,
-    CloneElim,
+    GCM,
+    FloatCollections,
     Verify,
     // Parameterized over whether analyses that aid visualization are necessary.
     // Useful to set to false if displaying a potentially broken module.
@@ -750,7 +750,7 @@ impl PassManager {
                     }
                     self.clear_analyses();
                 }
-                Pass::LegalizeReferenceSemantics => loop {
+                Pass::GCM => loop {
                     self.make_def_uses();
                     self.make_reverse_postorders();
                     self.make_typing();
@@ -782,7 +782,7 @@ impl PassManager {
                             &types_ref,
                             &def_uses[idx],
                         );
-                        if let Some(bb) = legalize_reference_semantics(
+                        if let Some(bb) = gcm(
                             &mut editor,
                             &def_uses[idx],
                             &reverse_postorders[idx],
@@ -808,30 +808,41 @@ impl PassManager {
                         break;
                     }
                 },
-                Pass::CloneElim => {
+                Pass::FloatCollections => {
                     self.make_def_uses();
+                    self.make_typing();
+                    self.make_callgraph();
                     let def_uses = self.def_uses.as_ref().unwrap();
-                    for idx in 0..self.module.functions.len() {
-                        let constants_ref =
-                            RefCell::new(std::mem::take(&mut self.module.constants));
-                        let dynamic_constants_ref =
-                            RefCell::new(std::mem::take(&mut self.module.dynamic_constants));
-                        let types_ref = RefCell::new(std::mem::take(&mut self.module.types));
-                        let mut editor = FunctionEditor::new(
-                            &mut self.module.functions[idx],
+                    let typing = self.typing.as_ref().unwrap();
+                    let callgraph = self.callgraph.as_ref().unwrap();
+                    let devices = device_placement(&self.module.functions, &callgraph);
+                    let constants_ref = RefCell::new(std::mem::take(&mut self.module.constants));
+                    let dynamic_constants_ref =
+                        RefCell::new(std::mem::take(&mut self.module.dynamic_constants));
+                    let types_ref = RefCell::new(std::mem::take(&mut self.module.types));
+                    let mut editors: Vec<_> = zip(
+                        self.module.functions.iter_mut().enumerate(),
+                        def_uses.iter(),
+                    )
+                    .map(|((idx, func), def_use)| {
+                        FunctionEditor::new(
+                            func,
                             FunctionID::new(idx),
                             &constants_ref,
                             &dynamic_constants_ref,
                             &types_ref,
-                            &def_uses[idx],
-                        );
-                        clone_elim(&mut editor);
+                            def_use,
+                        )
+                    })
+                    .collect();
+                    float_collections(&mut editors, typing, callgraph, &devices);
 
-                        self.module.constants = constants_ref.take();
-                        self.module.dynamic_constants = dynamic_constants_ref.take();
-                        self.module.types = types_ref.take();
+                    self.module.constants = constants_ref.take();
+                    self.module.dynamic_constants = dynamic_constants_ref.take();
+                    self.module.types = types_ref.take();
 
-                        self.module.functions[idx].delete_gravestones();
+                    for func in self.module.functions.iter_mut() {
+                        func.delete_gravestones();
                     }
                     self.clear_analyses();
                 }
diff --git a/juno_frontend/src/lib.rs b/juno_frontend/src/lib.rs
index ae6a74211272d9a557fd42c348fe2d0a94cf1a1e..9297173d4deededf8b541dfc821e1b88dd68841d 100644
--- a/juno_frontend/src/lib.rs
+++ b/juno_frontend/src/lib.rs
@@ -191,8 +191,6 @@ pub fn compile_ir(
     add_pass!(pm, verify, Unforkify);
     add_pass!(pm, verify, GVN);
     add_verified_pass!(pm, verify, DCE);
-    add_pass!(pm, verify, LegalizeReferenceSemantics);
-    add_pass!(pm, verify, CloneElim);
     add_pass!(pm, verify, DCE);
     add_pass!(pm, verify, Outline);
     add_pass!(pm, verify, InterproceduralSROA);
@@ -203,9 +201,10 @@ pub fn compile_ir(
         pm.add_pass(hercules_opt::pass::Pass::Xdot(true));
     }
 
-    add_pass!(pm, verify, LegalizeReferenceSemantics);
+    add_pass!(pm, verify, GCM);
     add_verified_pass!(pm, verify, DCE);
-    add_pass!(pm, verify, LegalizeReferenceSemantics);
+    add_pass!(pm, verify, FloatCollections);
+    add_pass!(pm, verify, GCM);
     pm.add_pass(hercules_opt::pass::Pass::Codegen(output_dir, module_name));
     pm.run_passes();
 
diff --git a/juno_samples/implicit_clone/src/implicit_clone.jn b/juno_samples/implicit_clone/src/implicit_clone.jn
index 67bdd44a68de51a9acd1a242c0b9bb40ad983fcd..882e5abc51bb596be63512eaff96434a2c45d43a 100644
--- a/juno_samples/implicit_clone/src/implicit_clone.jn
+++ b/juno_samples/implicit_clone/src/implicit_clone.jn
@@ -90,6 +90,7 @@ fn tricky3_loop_implicit_clone(a : usize, b : usize) -> usize {
     for kk = 0 to 10 {
       arr2[kk] += arr1[kk];
     }
+    x += arr2[1];
   }
   return x;
 }
@@ -112,7 +113,7 @@ fn no_implicit_clone(input : i32) -> i32 {
 }
 
 #[entry]
-fn complex_implicit_clone(input : i32) -> i32 {
+fn mirage_implicit_clone(input : i32) -> i32 {
   let arr1 : i32[2];
   let arr2 : i32[2];
   let arr3 : i32[2];
diff --git a/juno_samples/implicit_clone/src/main.rs b/juno_samples/implicit_clone/src/main.rs
index a46a67280c96aade3a056fe594443122972394e2..a92e4e2d7558733b28db0bba203415a73631ab6d 100644
--- a/juno_samples/implicit_clone/src/main.rs
+++ b/juno_samples/implicit_clone/src/main.rs
@@ -29,13 +29,13 @@ fn main() {
 
         let output = tricky3_loop_implicit_clone(5, 7).await;
         println!("{}", output);
-        assert_eq!(output, 0);
+        assert_eq!(output, 7);
 
         let output = no_implicit_clone(4).await;
         println!("{}", output);
         assert_eq!(output, 13);
 
-        let output = complex_implicit_clone(73).await;
+        let output = mirage_implicit_clone(73).await;
         println!("{}", output);
         assert_eq!(output, 843);
     });