Skip to content
Snippets Groups Projects
legalize_reference_semantics.rs 48.27 KiB
extern crate bitvec;
extern crate hercules_cg;
extern crate hercules_ir;

use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
use std::iter::{empty, once, zip, FromIterator};

use self::bitvec::prelude::*;

use self::hercules_cg::*;
use self::hercules_ir::*;

use crate::*;

/*
 * Top level function to legalize the reference semantics of a Hercules IR
 * function. Hercules IR is a value semantics representation, meaning that all
 * program state is in the form of copyable values, and mutation takes place by
 * making a new value that is a copy of the old value with some modification.
 * This representation is extremely convenient for optimization, but is not good
 * for code generation, where we need to generate code with references to get
 * good performance. Hercules IR can alternatively be interpreted using
 * reference semantics, where pointers to collection objects are passed around,
 * read from, and written to. However, the value semantics and reference
 * semantics interpretation of a Hercules IR function may not be equal - this
 * pass transforms a Hercules IR function such that its new value semantics is
 * the same as its old value semantics and that its new reference semantics is
 * the same as its new value semantics. This pass returns a placement of nodes
 * into ordered basic blocks, since the reference semantics of a function
 * depends on the order of execution with respect to anti-dependencies. Clones
 * are inserted sparingly when there are two write users of a single collection
 * or if a read user cannot be scheduled before a write user.
 */
pub fn legalize_reference_semantics(
    editor: &mut FunctionEditor,
    def_use: &ImmutableDefUseMap,
    reverse_postorder: &Vec<NodeID>,
    typing: &Vec<TypeID>,
    control_subgraph: &Subgraph,
    dom: &DomTree,
    fork_join_map: &HashMap<NodeID, NodeID>,
    loops: &LoopTree,
    objects: &CollectionObjects,
) -> Option<BasicBlocks> {
    // Repeatedly try to place nodes into basic blocks. If clones are induced,
    // re-try. Specifically, repeat the following procedure until no new clones:
    //
    // 1. Attempt to place nodes in basic blocks. If a node can't be placed due
    //    to anti-dependency edges, induce a clone on the read and go back to
    //    step 1.
    // 2. Check for any write-induced clones. If there are any, go back to step
    //    1.
    //
    // Since each analysis needs to be re-calculated in each iteration, this
    // function just implements the body of the described loop. The re-try logic
    // is found in pass.rs. When a re-try is needed, no basic block assignment
    // is returned. When a re-try isn't needed (no new clones were found), a
    // basic block assignment is returned.
    let bbs = match basic_blocks(
        editor.func(),
        editor.func_id(),
        def_use,
        reverse_postorder,
        control_subgraph,
        dom,
        loops,
        fork_join_map,
        objects,
    ) {
        Ok(bbs) => bbs,
        Err((obj, reader)) => {
            todo!();
            return None;
        }
    };
    if materialize_clones(editor, typing, control_subgraph, dom, loops, objects, &bbs) {
        None
    } else {
        Some(bbs)
    }
}

/*
 * Top level global code motion function. Assigns each data node to one of its
 * immediate control use / user nodes, forming (unordered) basic blocks. Returns
 * the control node / basic block each node is in. Takes in a partial
 * partitioning that must be respected. Based on the schedule-early-schedule-
 * late method from Cliff Click's PhD thesis. May fail if an anti-dependency
 * edge can't be satisfied - in this case, a clone that has to be induced is
 * returned instead.
 */
fn basic_blocks(
    function: &Function,
    func_id: FunctionID,
    def_use: &ImmutableDefUseMap,
    reverse_postorder: &Vec<NodeID>,
    control_subgraph: &Subgraph,
    dom: &DomTree,
    loops: &LoopTree,
    fork_join_map: &HashMap<NodeID, NodeID>,
    objects: &CollectionObjects,
) -> Result<BasicBlocks, (NodeID, NodeID)> {
    let mut bbs: Vec<Option<NodeID>> = vec![None; function.nodes.len()];

    // Step 1: assign the basic block locations of all nodes that must be in a
    // specific block. This includes control nodes as well as some special data
    // nodes, such as phis.
    for idx in 0..function.nodes.len() {
        match function.nodes[idx] {
            Node::Phi { control, data: _ } => bbs[idx] = Some(control),
            Node::ThreadID {
                control,
                dimension: _,
            } => bbs[idx] = Some(control),
            Node::Reduce {
                control,
                init: _,
                reduct: _,
            } => bbs[idx] = Some(control),
            Node::Call {
                control,
                function: _,
                dynamic_constants: _,
                args: _,
            } => bbs[idx] = Some(control),
            Node::Parameter { index: _ } => bbs[idx] = Some(NodeID::new(0)),
            Node::Constant { id: _ } => bbs[idx] = Some(NodeID::new(0)),
            Node::DynamicConstant { id: _ } => bbs[idx] = Some(NodeID::new(0)),
            _ if function.nodes[idx].is_control() => bbs[idx] = Some(NodeID::new(idx)),
            _ => {}
        }
    }

    // Step 2: schedule early. Place nodes in the earliest position they could
    // go - use worklist to iterate nodes.
    let mut schedule_early = bbs.clone();
    let mut worklist = VecDeque::from(reverse_postorder.clone());
    while let Some(id) = worklist.pop_front() {
        if schedule_early[id.idx()].is_some() {
            continue;
        }

        // For every use, check what block is its "schedule early" block. This
        // node goes in the lowest block amongst those blocks.
        let use_places: Option<Vec<NodeID>> = get_uses(&function.nodes[id.idx()])
            .as_ref()
            .into_iter()
            .map(|id| *id)
            .map(|id| schedule_early[id.idx()])
            .collect();
        if let Some(use_places) = use_places {
            // If every use has been placed, we can place this node as the
            // lowest place in the domtree that dominates all of the use places.
            let lowest = dom.lowest_amongst(use_places.into_iter());
            schedule_early[id.idx()] = Some(lowest);
        } else {
            // If not, then just push this node back on the worklist.
            worklist.push_back(id);
        }
    }

    // Step 3: find anti-dependence edges. An anti-dependence edge needs to be
    // drawn between a collection reading node and a collection mutating node
    // when the following conditions are true:
    //
    // 1: The reading and mutating nodes may involve the same collection.
    // 2: The node producing the collection used by the reading node is in a
    //    schedule early block that dominates the schedule early block of the
    //    mutating node. The node producing the collection used by the reading
    //    node may be an originator of a collection, phi or reduce, or mutator,
    //    but not forwarding read - forwarding reads are collapsed, and the
    //    bottom read is treated as reading from the transitive parent of the
    //    forwarding read(s).
    let mut antideps = BTreeSet::new();
    for id in reverse_postorder.iter() {
        // Find a terminating read node and the collections it reads.
        let terminating_reads: BTreeSet<_> =
            terminating_reads(function, func_id, *id, objects).collect();
        if !terminating_reads.is_empty() {
            // Walk forwarding reads to find anti-dependency roots.
            let mut workset = terminating_reads.clone();
            let mut roots = BTreeSet::new();
            while let Some(pop) = workset.pop_first() {
                let forwarded: BTreeSet<_> =
                    forwarding_reads(function, func_id, pop, objects).collect();
                if forwarded.is_empty() {
                    roots.insert(pop);
                } else {
                    workset.extend(forwarded);
                }
            }

            // For each root, find mutating nodes dominated by the root that
            // modify an object read on any input of the current node (the
            // terminating read).
            // TODO: make this less outrageously inefficient.
            let func_objects = &objects[&func_id];
            for root in roots.iter() {
                let root_early = schedule_early[root.idx()].unwrap();
                let mut root_block_iterated_users: BTreeSet<NodeID> = BTreeSet::new();
                let mut workset = BTreeSet::new();
                workset.insert(*root);
                while let Some(pop) = workset.pop_first() {
                    let users = def_use.get_users(pop).into_iter().filter(|user| {
                        !function.nodes[user.idx()].is_phi()
                            && !function.nodes[user.idx()].is_reduce()
                            && schedule_early[user.idx()].unwrap() == root_early
                    });
                    workset.extend(users.clone());
                    root_block_iterated_users.extend(users);
                }
                let read_objs: BTreeSet<_> = terminating_reads
                    .iter()
                    .map(|read_use| func_objects.objects(*read_use).into_iter())
                    .flatten()
                    .map(|id| *id)
                    .collect();
                for mutator in reverse_postorder.iter() {
                    let mutator_early = schedule_early[mutator.idx()].unwrap();
                    if dom.does_dom(root_early, mutator_early)
                        && (root_early != mutator_early
                            || root_block_iterated_users.contains(&mutator))
                        && mutating_objects(function, func_id, *mutator, objects)
                            .any(|mutated| read_objs.contains(&mutated))
                        && id != mutator
                    {
                        antideps.insert((*id, *mutator));
                    }
                }
            }
        }
    }
    let mut antideps_uses = vec![vec![]; function.nodes.len()];
    let mut antideps_users = vec![vec![]; function.nodes.len()];
    for (reader, mutator) in antideps.iter() {
        antideps_uses[mutator.idx()].push(*reader);
        antideps_users[reader.idx()].push(*mutator);
    }

    // Step 4: schedule late and pick each nodes final position. Since the late
    // schedule of each node depends on the final positions of its users, these
    // two steps must be fused. Compute their latest position, then use the
    // control dependent + shallow loop heuristic to actually place them.
    let join_fork_map: HashMap<NodeID, NodeID> = fork_join_map
        .into_iter()
        .map(|(fork, join)| (*join, *fork))
        .collect();
    let mut worklist = VecDeque::from_iter(reverse_postorder.into_iter().map(|id| *id).rev());
    while let Some(id) = worklist.pop_front() {
        if bbs[id.idx()].is_some() {
            continue;
        }

        // Calculate the least common ancestor of user blocks, a.k.a. the "late"
        // schedule.
        let calculate_lca = || -> Option<_> {
            let mut lca = None;
            // Helper to incrementally update the LCA.
            let mut update_lca = |a| {
                if let Some(acc) = lca {
                    lca = Some(dom.least_common_ancestor(acc, a));
                } else {
                    lca = Some(a);
                }
            };

            // For every user, consider where we need to be to directly dominate the
            // user.
            for user in def_use
                .get_users(id)
                .as_ref()
                .into_iter()
                .chain(antideps_users[id.idx()].iter())
                .map(|id| *id)
            {
                if let Node::Phi { control, data } = &function.nodes[user.idx()] {
                    // For phis, we need to dominate the block jumping to the phi in
                    // the slot that corresponds to our use.
                    for (control, data) in
                        zip(get_uses(&function.nodes[control.idx()]).as_ref(), data)
                    {
                        if id == *data {
                            update_lca(*control);
                        }
                    }
                } else if let Node::Reduce {
                    control,
                    init,
                    reduct,
                } = &function.nodes[user.idx()]
                {
                    // For reduces, we need to either dominate the block right
                    // before the fork if we're the init input, or we need to
                    // dominate the join if we're the reduct input.
                    if id == *init {
                        let before_fork = function.nodes[join_fork_map[control].idx()]
                            .try_fork()
                            .unwrap()
                            .0;
                        update_lca(before_fork);
                    } else {
                        assert_eq!(id, *reduct);
                        update_lca(*control);
                    }
                } else {
                    // For everything else, we just need to dominate the user.
                    update_lca(bbs[user.idx()]?);
                }
            }

            Some(lca)
        };

        // Check if all users have been placed. If one of them hasn't, then add
        // this node back on to the worklist.
        let Some(lca) = calculate_lca() else {
            worklist.push_back(id);
            continue;
        };

        // Look between the LCA and the schedule early location to place the
        // node.
        let schedule_early = schedule_early[id.idx()].unwrap();
        let schedule_late = lca.unwrap_or(schedule_early);
        let mut chain = dom
            // If the node has no users, then it doesn't really matter where we
            // place it - just place it at the early placement.
            .chain(schedule_late, schedule_early);

        if let Some(mut location) = chain.next() {
            /*
            while let Some(control_node) = chain.next() {
                // If the next node further up the dominator tree is in a shallower
                // loop nest or if we can get out of a reduce loop when we don't
                // need to be in one, place this data node in a higher-up location.
                let old_nest = loops
                    .header_of(location)
                    .map(|header| loops.nesting(header).unwrap());
                let new_nest = loops
                    .header_of(control_node)
                    .map(|header| loops.nesting(header).unwrap());
                let shallower_nest = if let (Some(old_nest), Some(new_nest)) = (old_nest, new_nest)
                {
                    old_nest > new_nest
                } else {
                    // If the new location isn't a loop, it's nesting level should
                    // be considered "shallower" if the current location is in a
                    // loop.
                    old_nest.is_some()
                };
                // This will move all nodes that don't need to be in reduce loops
                // outside of reduce loops. Nodes that do need to be in a reduce
                // loop use the reduce node forming the loop, so the dominator chain
                // will consist of one block, and this loop won't ever iterate.
                let currently_at_join = function.nodes[location.idx()].is_join();
                if shallower_nest || currently_at_join {
                    location = control_node;
                }
            }
            */

            bbs[id.idx()] = Some(location);
        } else {
            // If there is no valid location for this node, then it's a reading
            // node of a collection that can't be placed above a mutation that
            // anti-depend uses it. Thus, a clone needs to be induced.
            todo!()
        }
    }
    let bbs: Vec<_> = bbs.into_iter().map(Option::unwrap).collect();

    // Step 5: determine the order of nodes inside each block. Use worklist to
    // add nodes to blocks in order that obeys dependencies.
    let mut order: Vec<Vec<NodeID>> = vec![vec![]; function.nodes.len()];
    let mut worklist = VecDeque::from_iter(
        reverse_postorder
            .into_iter()
            .filter(|id| !function.nodes[id.idx()].is_control()),
    );
    let mut visited = bitvec![u8, Lsb0; 0; function.nodes.len()];
    let mut no_change_iters = 0;
    while no_change_iters <= worklist.len()
        && let Some(id) = worklist.pop_front()
    {
        let node = &function.nodes[id.idx()];
        if node.is_phi()
            || node.is_reduce()
            || get_uses(node)
                .as_ref()
                .into_iter()
                .chain(antideps_uses[id.idx()].iter())
                .all(|u| {
                    function.nodes[u.idx()].is_control()
                        || bbs[u.idx()] != bbs[id.idx()]
                        || visited[u.idx()]
                })
        {
            order[bbs[id.idx()].idx()].push(*id);
            visited.set(id.idx(), true);
            no_change_iters = 0;
        } else {
            worklist.push_back(id);
            no_change_iters += 1;
        }
    }

    if no_change_iters == 0 {
        Ok((bbs, order))
    } else {
        // If the worklist exited without finishing, then there's at least one
        // reading node of a collection that is in a anti-depend + normal depend
        // use cycle with a mutating node. This cycle must be broken by inducing
        // a clone.
        todo!()
    }
}

fn terminating_reads<'a>(
    function: &'a Function,
    func_id: FunctionID,
    reader: NodeID,
    objects: &'a CollectionObjects,
) -> Box<dyn Iterator<Item = NodeID> + 'a> {
    match function.nodes[reader.idx()] {
        Node::Read {
            collect,
            indices: _,
        } if objects[&func_id].objects(reader).is_empty() => Box::new(once(collect)),
        Node::Write {
            collect: _,
            data,
            indices: _,
        } if !objects[&func_id].objects(data).is_empty() => Box::new(once(data)),
        Node::Call {
            control: _,
            function: callee,
            dynamic_constants: _,
            ref args,
        } => Box::new(args.into_iter().enumerate().filter_map(move |(idx, arg)| {
            let objects = &objects[&callee];
            let returns = objects.returned_objects();
            let param_obj = objects.param_to_object(idx)?;
            if !objects.is_mutated(param_obj) && !returns.contains(&param_obj) {
                Some(*arg)
            } else {
                None
            }
        })),
        _ => Box::new(empty()),
    }
}

fn forwarding_reads<'a>(
    function: &'a Function,
    func_id: FunctionID,
    reader: NodeID,
    objects: &'a CollectionObjects,
) -> Box<dyn Iterator<Item = NodeID> + 'a> {
    match function.nodes[reader.idx()] {
        Node::Read {
            collect,
            indices: _,
        } if !objects[&func_id].objects(reader).is_empty() => Box::new(once(collect)),
        Node::Ternary {
            op: TernaryOperator::Select,
            first: _,
            second,
            third,
        } if !objects[&func_id].objects(reader).is_empty() => {
            Box::new(once(second).chain(once(third)))
        }
        Node::Call {
            control: _,
            function: callee,
            dynamic_constants: _,
            ref args,
        } => Box::new(args.into_iter().enumerate().filter_map(move |(idx, arg)| {
            let objects = &objects[&callee];
            let returns = objects.returned_objects();
            let param_obj = objects.param_to_object(idx)?;
            if !objects.is_mutated(param_obj) && returns.contains(&param_obj) {
                Some(*arg)
            } else {
                None
            }
        })),
        _ => Box::new(empty()),
    }
}
fn mutating_objects<'a>(
    function: &'a Function,
    func_id: FunctionID,
    mutator: NodeID,
    objects: &'a CollectionObjects,
) -> Box<dyn Iterator<Item = CollectionObjectID> + 'a> {
    match function.nodes[mutator.idx()] {
        Node::Write {
            collect,
            data: _,
            indices: _,
        } => Box::new(objects[&func_id].objects(collect).into_iter().map(|id| *id)),
        Node::Call {
            control: _,
            function: callee,
            dynamic_constants: _,
            ref args,
        } => Box::new(
            args.into_iter()
                .enumerate()
                .filter_map(move |(idx, arg)| {
                    let callee_objects = &objects[&callee];
                    let param_obj = callee_objects.param_to_object(idx)?;
                    if callee_objects.is_mutated(param_obj) {
                        Some(objects[&func_id].objects(*arg).into_iter().map(|id| *id))
                    } else {
                        None
                    }
                })
                .flatten(),
        ),
        _ => Box::new(empty()),
    }
}

/*
 * Top level function to materialize clones of collections. This transformation
 * eliminates the possibility of multiple independent writes (including dynamic
 * writes) to a single collection by introducing extra collection constants and
 * inserting explicit clones. This allows us to make the simplifying assumption
 * in the backend that collections have reference, rather than value, semantics.
 * The pass calling this function is mandatory for correctness.
 */
fn materialize_clones(
    editor: &mut FunctionEditor,
    typing: &Vec<TypeID>,
    control_subgraph: &Subgraph,
    dom: &DomTree,
    loops: &LoopTree,
    objects: &CollectionObjects,
    bbs: &BasicBlocks,
) -> bool {
    let rev_po = control_subgraph.rev_po(NodeID::new(0));
    let mut total_num_pts = 0;
    let mut bb_to_prefix_sum = vec![0; bbs.0.len()];
    for ((idx, bb), insts) in zip(bbs.0.iter().enumerate(), bbs.1.iter()) {
        if idx == bb.idx() {
            bb_to_prefix_sum[idx] = total_num_pts;
            total_num_pts += insts.len() + 1;
        }
    }

    // Calculate two lattices - one that includes back edges, and one that
    // doesn't. We want to handle simple clones before loop induced clones, so
    // we first materialize clones based on the no-back-edges lattice, and hten
    // based on the full lattice.
    let mut no_back_edge_lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> =
        vec![BTreeMap::new(); total_num_pts];
    used_collections_dataflow(
        editor,
        &mut no_back_edge_lattice,
        &rev_po,
        &bb_to_prefix_sum,
        control_subgraph,
        objects,
        bbs,
    );
    let mut super_value = BTreeMap::new();
    if find_clones(
        editor,
        &super_value,
        &no_back_edge_lattice,
        &rev_po,
        &typing,
        control_subgraph,
        dom,
        loops,
        objects,
        &bb_to_prefix_sum,
        bbs,
    ) {
        return true;
    }

    // After inducing simple clones, calculate the full lattice and materialize
    // any loop induced clones.
    let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts];
    loop {
        let changed = used_collections_dataflow(
            editor,
            &mut lattice,
            &rev_po,
            &bb_to_prefix_sum,
            control_subgraph,
            objects,
            bbs,
        );
        if !changed {
            break;
        }
    }
    for value in lattice.iter() {
        meet(&mut super_value, value);
    }
    find_clones(
        editor,
        &super_value,
        &lattice,
        &rev_po,
        &typing,
        control_subgraph,
        dom,
        loops,
        objects,
        &bb_to_prefix_sum,
        bbs,
    )
}

fn meet(left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>, right: &BTreeMap<NodeID, BTreeSet<NodeID>>) {
    for (used, users) in right.into_iter() {
        left.entry(*used).or_default().extend(users.into_iter());
    }
}

/*
 * Helper function to run a single iteration of the used collections dataflow
 * analysis. Returns whether the lattice was changed. The lattice maps each
 * program point to a set of used values and their possible users. Top is that
 * no nodes are used yet.
 */
fn used_collections_dataflow(
    editor: &FunctionEditor,
    lattice: &mut Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
    rev_po: &Vec<NodeID>,
    bb_to_prefix_sum: &Vec<usize>,
    control_subgraph: &Subgraph,
    objects: &CollectionObjects,
    bbs: &BasicBlocks,
) -> bool {
    // Run dataflow analysis to figure out which accesses to collections induce
    // clones. This dataflow analysis depends on basic block assignments and is
    // more analogous to standard dataflow analysis in CFG + SSA IRs. This is
    // the only place this form is used, so just hardcode it here.
    //
    // This forward dataflow analysis tracks which collections are used at each
    // program point, and by what user nodes. Collections are referred to using
    // node IDs. Specifically:
    //
    // - Phi - a phi node adds its inputs to the used set and removes itself
    //   from the used set. If a phi uses an ID that is used along the edge of
    //   the corresponding predecessor, a clone is induced.
    // - Select - a select node adds its inputs to the used set and removes
    //   itself from the used set. If either use is already used, a clone is
    //   induced.
    // - Reduce - a reduce node adds its inputs to the used set and removes
    //   itself from the used set. If the `init` input is already used, a clone
    //   is induced. If the `reduct` input is used at the end of the basic block
    //   containing the reduce, then a clone is induced. At the end of the basic
    //   block, the reduce removes itself from the used set.
    // - Read - a read node that reads a sub-collections from a collection,
    //   rather than reading a primitive type, adds its input to the used set
    //   and removes itself from the used set. If the `collect` input is already
    //   used, a clone is induced.
    // - Write - a write node adds its `collect` input to the used set and
    //   removes itself from the used set. If the `collect` input is already
    //   used, a clone is induced.
    // - Call - a call node adds any mutated input or input that may be returned
    //   to the used set and removes itself from the used set. If any mutated
    //   input is already used, a clone is induced.
    //
    // Reads of sub-collections (select, some read, and some call nodes) use a
    // collection because they may have downstream writes that depend on the new
    // "sub-view" of the same collection. This does not include reads that "end"
    // (most reads, some calls, the `data` input of a write). This analysis does
    // not consider parallel mutations in fork-joins.
    let nodes = &editor.func().nodes;
    let func_id = editor.func_id();
    let mut changed = false;

    for bb in rev_po.iter() {
        // The lattice value of the first point is the meet of the
        // predecessor terminating lattice values.
        let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]];
        let mut new_top_value = BTreeMap::new();
        // Clearing `top_value` is not necessary since used nodes are never
        // removed from lattice values, only added.
        for pred in control_subgraph.preds(*bb) {
            let last_pt = bbs.1[pred.idx()].len();
            meet(
                &mut new_top_value,
                &lattice[bb_to_prefix_sum[pred.idx()] + last_pt],
            );
        }
        changed |= *old_top_value != new_top_value;
        lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value;

        // The lattice value of following points are determined by their
        // immediate preceding instructions.
        let insts = &bbs.1[bb.idx()];
        for (prev_pt, inst) in insts.iter().enumerate() {
            let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1];
            let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
            let mut new_value = prev_value.clone();
            match nodes[inst.idx()] {
                Node::Phi {
                    control: _,
                    ref data,
                } if !objects[&func_id].objects(*inst).is_empty() => {
                    for elem in data {
                        new_value.entry(*elem).or_default().insert(*inst);
                    }
                    new_value.remove(inst);
                }
                Node::Ternary {
                    op: TernaryOperator::Select,
                    first: _,
                    second,
                    third,
                }
                | Node::Reduce {
                    control: _,
                    init: second,
                    reduct: third,
                } => {
                    if !objects[&func_id].objects(*inst).is_empty() {
                        new_value.entry(second).or_default().insert(*inst);
                        new_value.entry(third).or_default().insert(*inst);
                        new_value.remove(inst);
                    }
                }
                Node::Read {
                    collect,
                    indices: _,
                } if !objects[&func_id].objects(*inst).is_empty() => {
                    new_value.entry(collect).or_default().insert(*inst);
                    new_value.remove(inst);
                }
                Node::Write {
                    collect,
                    data: _,
                    indices: _,
                } => {
                    new_value.entry(collect).or_default().insert(*inst);
                    new_value.remove(inst);
                }
                Node::Call {
                    control: _,
                    function: callee,
                    dynamic_constants: _,
                    ref args,
                } => {
                    let callee_objects = &objects[&callee];
                    for (param_idx, arg) in args.into_iter().enumerate() {
                        if callee_objects
                            .param_to_object(param_idx)
                            .map(|object| {
                                callee_objects.is_mutated(object)
                                    || callee_objects.returned_objects().contains(&object)
                            })
                            .unwrap_or(false)
                        {
                            new_value.entry(*arg).or_default().insert(*inst);
                        }
                    }
                    new_value.remove(inst);
                }
                _ => {}
            }
            changed |= *old_value != new_value;
            lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value;
        }

        // Handle reduces in this block specially at the very end.
        let last_pt = insts.len();
        let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt];
        let mut new_bottom_value = old_bottom_value.clone();
        for inst in insts.iter() {
            if let Node::Reduce {
                control: _,
                init: _,
                reduct,
            } = nodes[inst.idx()]
            {
                assert!(
                    new_bottom_value.contains_key(&reduct),
                    "PANIC: Can't handle clones inside a reduction cycle currently."
                );
                new_bottom_value.remove(inst);
            }
        }
        changed |= *old_bottom_value != new_bottom_value;
        lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value;
    }

    changed
}

/*
 * Helper function to induce a clone once an object with multiple users has been
 * found.
 */
fn induce_clone(
    editor: &mut FunctionEditor,
    object: NodeID,
    user: NodeID,
    value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
    super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
    lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
    rev_po: &Vec<NodeID>,
    typing: &Vec<TypeID>,
    control_subgraph: &Subgraph,
    dom: &DomTree,
    loops: &LoopTree,
    bb_to_prefix_sum: &Vec<usize>,
    bbs: &BasicBlocks,
) {
    // If `user` already used `object` and tries to use it again, then the
    // clone is a "loop induced" clone. Otherwise, it's a simple clone.
    if !value[&object].contains(&user) {
        let success = editor.edit(|mut edit| {
            // Create the constant collection object for allocation.
            let object_ty = typing[object.idx()];
            let object_cons = edit.add_zero_constant(object_ty);
            let cons_node = edit.add_node(Node::Constant { id: object_cons });

            // Create the clone into the new constant collection.
            let clone_node = edit.add_node(Node::Write {
                collect: cons_node,
                data: object,
                indices: vec![].into_boxed_slice(),
            });

            // Make user use the cloned object.
            edit.replace_all_uses_where(object, clone_node, |id| *id == user)
        });
        assert!(success);
    } else {
        // Figure out where to place that phi. This is the deepest
        // loop header where `user` is responsible for making `object` used
        // used at the top of the block, and the block dominates the block
        // containing `user`. If `user` is a phi, then the region it's
        // attached to is excluded from eligibility.
        let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| {
            lattice[bb_to_prefix_sum[bb.idx()]]
                .get(&object)
                .unwrap_or(&BTreeSet::new())
                .contains(&user)
                && dom.does_dom(*bb, bbs.0[user.idx()])
                && loops.contains(*bb)
                && loops.is_in_loop(*bb, bbs.0[user.idx()])
                && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()])
        });
        let top_block = eligible_blocks
            .max_by_key(|bb| loops.nesting(*bb).unwrap())
            .unwrap();
        assert!(editor.func().nodes[top_block.idx()].is_region());

        // Figure out the users of `object` that we need to phi back
        // upwards. Assign each user a number indicating how far down the
        // user chain it is, higher is farther down. This is used for
        // picking the most downstream user later.
        let mut users: BTreeMap<NodeID, usize> = BTreeMap::new();
        let mut workset: BTreeSet<NodeID> = BTreeSet::new();
        workset.insert(object);
        let mut chain_ordering = 1;
        assert!(!super_value.is_empty());
        while let Some(pop) = workset.pop_first() {
            let iterated_users: BTreeSet<_> = super_value
                .get(&pop)
                .map(|users| users.into_iter())
                .into_iter()
                .flatten()
                .map(|id| *id)
                .filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()]))
                .collect();
            workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id)));
            for user in iterated_users {
                *users.entry(user).or_default() = chain_ordering;
                chain_ordering += 1;
            }
        }

        // The fringe users may not dominate any predecessors of the loop
        // header. The following is some Juno code that exposes this:
        //
        // fn problematic(a : size) -> i32 {
        //   for i = 0 to a {
        //     let arr : i32[1];
        //     for j = 0 to a {
        //       arr[0] = 1;
        //     }
        //   }
        //   return 0;
        // }
        //
        // Note that `arr` induces a clone each iteration, since its value
        // needs to be reset to all zeros. However, it should also be noted
        // that the most fringe user of `arr`, the write inside the inner
        // loop, does not dominate the bottom of the outer loop. Thus, we
        // need to insert a phi in the bottom block of the outer loop to
        // retrieve either the write, or `arr` before the inner loop. The
        // general version of this problem requires the following solution.
        // Our goal is to figure out which downstream user represents
        // `object` at each block in the loop. We first assign each block
        // containing a user the most downstream user it contains. Then, we
        // create a dummy phi for every region (including the header) in the
        // loop, which is the downstream user for that block. Then, every
        // other block is assigned the downstream user of its single
        // predecessor. This basically amounts to recreating SSA for
        // `object` inside the loop.
        let mut user_per_loop_bb = BTreeMap::new();
        let mut added_phis = BTreeMap::new();
        let mut top_phi = NodeID::new(0);
        // Assign existing users.
        for (user, ordering) in users.iter() {
            let bb = bbs.0[user.idx()];
            if let Some(old_user) = user_per_loop_bb.get(&bb)
                && users[old_user] > *ordering
            {
            } else {
                user_per_loop_bb.insert(bb, *user);
            }
        }
        // Assign dummy phis.
        for bb in loops.nodes_in_loop(top_block) {
            if (!user_per_loop_bb.contains_key(&bb) || bb == top_block)
                && editor.func().nodes[bb.idx()].is_region()
            {
                let success = editor.edit(|mut edit| {
                    let phi_node = edit.add_node(Node::Phi {
                        control: bb,
                        data: empty().collect(),
                    });
                    if bb != top_block || !user_per_loop_bb.contains_key(&bb) {
                        user_per_loop_bb.insert(bb, phi_node);
                    }
                    if bb == top_block {
                        top_phi = phi_node;
                    }
                    added_phis.insert(phi_node, bb);
                    Ok(edit)
                });
                assert!(success);
            }
        }
        // Assign users for the rest of the blocks.
        for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) {
            if !user_per_loop_bb.contains_key(&bb) {
                assert!(control_subgraph.preds(*bb).count() == 1);
                user_per_loop_bb.insert(
                    *bb,
                    user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()],
                );
            }
        }

        // Induce the clone.
        let success = editor.edit(|mut edit| {
            // Create the constant collection object for allocation.
            let object_ty = typing[object.idx()];
            let object_cons = edit.add_zero_constant(object_ty);
            let cons_node = edit.add_node(Node::Constant { id: object_cons });

            // Create the phis.
            let mut phi_map = BTreeMap::new();
            let mut real_phis = BTreeSet::new();
            for (dummy, bb) in added_phis {
                let real = edit.add_node(Node::Phi {
                    control: bb,
                    data: control_subgraph
                        .preds(bb)
                        .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node))
                        .collect(),
                });
                phi_map.insert(dummy, real);
                real_phis.insert(real);
            }

            // Create the clone into the phi.
            let real_top_phi = phi_map[&top_phi];
            let clone_node = edit.add_node(Node::Write {
                collect: real_top_phi,
                data: object,
                indices: vec![].into_boxed_slice(),
            });

            // Make users use the cloned object.
            edit = edit.replace_all_uses_where(object, clone_node, |id| {
                id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()])
            })?;

            // Get rid of the dummy phis.
            for (dummy, real) in phi_map {
                edit = edit.replace_all_uses(dummy, real)?;
                edit = edit.delete_node(dummy)?;
            }

            // Make phis use the clone instead of the top phi.
            edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)
        });
        assert!(success);

        // De-duplicate phis.
        gvn(editor, false);

        // Get rid of unused phis.
        dce(editor);

        // Simplify phis.
        phi_elim(editor);
    }
}

/*
 * Helper function to analyze lattice values at each program point and find
 * multiple dynamic users of a single write. Return as soon as any clone is
 * found.
 */
fn find_clones(
    editor: &mut FunctionEditor,
    super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
    lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
    rev_po: &Vec<NodeID>,
    typing: &Vec<TypeID>,
    control_subgraph: &Subgraph,
    dom: &DomTree,
    loops: &LoopTree,
    objects: &CollectionObjects,
    bb_to_prefix_sum: &Vec<usize>,
    bbs: &BasicBlocks,
) -> bool {
    let nodes = &editor.func().nodes;
    let func_id = editor.func_id();
    for bb in rev_po.iter().rev() {
        let insts = &bbs.1[bb.idx()];
        // Accumulate predecessor bottom used sets for phis. Phis are special in
        // that they need to be path sensitive, but multiple phis in a single
        // block may use a single collection, and that needs to induce a clone.
        let mut phi_acc_bottoms = BTreeMap::new();
        for (prev_pt, inst) in insts.iter().enumerate() {
            let value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
            match nodes[inst.idx()] {
                Node::Phi {
                    control: _,
                    ref data,
                } => {
                    // In phis, check if an argument is already used in the
                    // predecessor's bottom lattice value (phis need to be path-
                    // sensitive).
                    for (pred, arg) in zip(control_subgraph.preds(*bb), data) {
                        let bottom = phi_acc_bottoms.entry(pred).or_insert_with(|| {
                            let last_pt = bbs.1[pred.idx()].len();
                            let bottom = &lattice[bb_to_prefix_sum[pred.idx()] + last_pt];
                            bottom.clone()
                        });
                        if bottom.contains_key(&arg) {
                            induce_clone(
                                editor,
                                *arg,
                                *inst,
                                bottom,
                                super_value,
                                lattice,
                                rev_po,
                                typing,
                                control_subgraph,
                                dom,
                                loops,
                                bb_to_prefix_sum,
                                bbs,
                            );
                            return true;
                        } else {
                            // Subsequent phis using `arg` along the same
                            // predecessor induce a clone.
                            bottom.insert(*arg, once(*inst).collect());
                        }
                    }
                }
                Node::Ternary {
                    op: TernaryOperator::Select,
                    first: _,
                    second,
                    third,
                } => {
                    if value.contains_key(&second) {
                        induce_clone(
                            editor,
                            second,
                            *inst,
                            &value,
                            super_value,
                            lattice,
                            rev_po,
                            typing,
                            control_subgraph,
                            dom,
                            loops,
                            bb_to_prefix_sum,
                            bbs,
                        );
                        return true;
                    }
                    if value.contains_key(&third) {
                        induce_clone(
                            editor,
                            third,
                            *inst,
                            &value,
                            super_value,
                            lattice,
                            rev_po,
                            typing,
                            control_subgraph,
                            dom,
                            loops,
                            bb_to_prefix_sum,
                            bbs,
                        );
                        return true;
                    }
                }
                Node::Reduce {
                    control: _,
                    init,
                    reduct: _,
                } => {
                    if value.contains_key(&init) {
                        induce_clone(
                            editor,
                            init,
                            *inst,
                            &value,
                            super_value,
                            lattice,
                            rev_po,
                            typing,
                            control_subgraph,
                            dom,
                            loops,
                            bb_to_prefix_sum,
                            bbs,
                        );
                        return true;
                    }
                }
                Node::Read {
                    collect,
                    indices: _,
                } if !objects[&func_id].objects(*inst).is_empty() => {
                    if value.contains_key(&collect) {
                        induce_clone(
                            editor,
                            collect,
                            *inst,
                            &value,
                            super_value,
                            lattice,
                            rev_po,
                            typing,
                            control_subgraph,
                            dom,
                            loops,
                            bb_to_prefix_sum,
                            bbs,
                        );
                        return true;
                    }
                }
                Node::Write {
                    collect,
                    data: _,
                    indices: _,
                } => {
                    if value.contains_key(&collect) {
                        induce_clone(
                            editor,
                            collect,
                            *inst,
                            &value,
                            super_value,
                            lattice,
                            rev_po,
                            typing,
                            control_subgraph,
                            dom,
                            loops,
                            bb_to_prefix_sum,
                            bbs,
                        );
                        return true;
                    }
                }
                Node::Call {
                    control: _,
                    function: callee,
                    dynamic_constants: _,
                    ref args,
                } => {
                    let callee_objects = &objects[&callee];
                    for (param_idx, arg) in args.into_iter().enumerate() {
                        if callee_objects
                            .param_to_object(param_idx)
                            .map(|object| {
                                callee_objects.is_mutated(object)
                                    || callee_objects.returned_objects().contains(&object)
                            })
                            .unwrap_or(false)
                            && value.contains_key(arg)
                        {
                            induce_clone(
                                editor,
                                *arg,
                                *inst,
                                value,
                                super_value,
                                lattice,
                                rev_po,
                                typing,
                                control_subgraph,
                                dom,
                                loops,
                                bb_to_prefix_sum,
                                bbs,
                            );
                            return true;
                        }
                    }
                }
                _ => {}
            }
        }
    }
    false
}