legalize_reference_semantics.rs 48.27 KiB
extern crate bitvec;
extern crate hercules_cg;
extern crate hercules_ir;
use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
use std::iter::{empty, once, zip, FromIterator};
use self::bitvec::prelude::*;
use self::hercules_cg::*;
use self::hercules_ir::*;
use crate::*;
/*
* Top level function to legalize the reference semantics of a Hercules IR
* function. Hercules IR is a value semantics representation, meaning that all
* program state is in the form of copyable values, and mutation takes place by
* making a new value that is a copy of the old value with some modification.
* This representation is extremely convenient for optimization, but is not good
* for code generation, where we need to generate code with references to get
* good performance. Hercules IR can alternatively be interpreted using
* reference semantics, where pointers to collection objects are passed around,
* read from, and written to. However, the value semantics and reference
* semantics interpretation of a Hercules IR function may not be equal - this
* pass transforms a Hercules IR function such that its new value semantics is
* the same as its old value semantics and that its new reference semantics is
* the same as its new value semantics. This pass returns a placement of nodes
* into ordered basic blocks, since the reference semantics of a function
* depends on the order of execution with respect to anti-dependencies. Clones
* are inserted sparingly when there are two write users of a single collection
* or if a read user cannot be scheduled before a write user.
*/
pub fn legalize_reference_semantics(
editor: &mut FunctionEditor,
def_use: &ImmutableDefUseMap,
reverse_postorder: &Vec<NodeID>,
typing: &Vec<TypeID>,
control_subgraph: &Subgraph,
dom: &DomTree,
fork_join_map: &HashMap<NodeID, NodeID>,
loops: &LoopTree,
objects: &CollectionObjects,
) -> Option<BasicBlocks> {
// Repeatedly try to place nodes into basic blocks. If clones are induced,
// re-try. Specifically, repeat the following procedure until no new clones:
//
// 1. Attempt to place nodes in basic blocks. If a node can't be placed due
// to anti-dependency edges, induce a clone on the read and go back to
// step 1.
// 2. Check for any write-induced clones. If there are any, go back to step
// 1.
//
// Since each analysis needs to be re-calculated in each iteration, this
// function just implements the body of the described loop. The re-try logic
// is found in pass.rs. When a re-try is needed, no basic block assignment
// is returned. When a re-try isn't needed (no new clones were found), a
// basic block assignment is returned.
let bbs = match basic_blocks(
editor.func(),
editor.func_id(),
def_use,
reverse_postorder,
control_subgraph,
dom,
loops,
fork_join_map,
objects,
) {
Ok(bbs) => bbs,
Err((obj, reader)) => {
todo!();
return None;
}
};
if materialize_clones(editor, typing, control_subgraph, dom, loops, objects, &bbs) {
None
} else {
Some(bbs)
}
}
/*
* Top level global code motion function. Assigns each data node to one of its
* immediate control use / user nodes, forming (unordered) basic blocks. Returns
* the control node / basic block each node is in. Takes in a partial
* partitioning that must be respected. Based on the schedule-early-schedule-
* late method from Cliff Click's PhD thesis. May fail if an anti-dependency
* edge can't be satisfied - in this case, a clone that has to be induced is
* returned instead.
*/
fn basic_blocks(
function: &Function,
func_id: FunctionID,
def_use: &ImmutableDefUseMap,
reverse_postorder: &Vec<NodeID>,
control_subgraph: &Subgraph,
dom: &DomTree,
loops: &LoopTree,
fork_join_map: &HashMap<NodeID, NodeID>,
objects: &CollectionObjects,
) -> Result<BasicBlocks, (NodeID, NodeID)> {
let mut bbs: Vec<Option<NodeID>> = vec![None; function.nodes.len()];
// Step 1: assign the basic block locations of all nodes that must be in a
// specific block. This includes control nodes as well as some special data
// nodes, such as phis.
for idx in 0..function.nodes.len() {
match function.nodes[idx] {
Node::Phi { control, data: _ } => bbs[idx] = Some(control),
Node::ThreadID {
control,
dimension: _,
} => bbs[idx] = Some(control),
Node::Reduce {
control,
init: _,
reduct: _,
} => bbs[idx] = Some(control),
Node::Call {
control,
function: _,
dynamic_constants: _,
args: _,
} => bbs[idx] = Some(control),
Node::Parameter { index: _ } => bbs[idx] = Some(NodeID::new(0)),
Node::Constant { id: _ } => bbs[idx] = Some(NodeID::new(0)),
Node::DynamicConstant { id: _ } => bbs[idx] = Some(NodeID::new(0)),
_ if function.nodes[idx].is_control() => bbs[idx] = Some(NodeID::new(idx)),
_ => {}
}
}
// Step 2: schedule early. Place nodes in the earliest position they could
// go - use worklist to iterate nodes.
let mut schedule_early = bbs.clone();
let mut worklist = VecDeque::from(reverse_postorder.clone());
while let Some(id) = worklist.pop_front() {
if schedule_early[id.idx()].is_some() {
continue;
}
// For every use, check what block is its "schedule early" block. This
// node goes in the lowest block amongst those blocks.
let use_places: Option<Vec<NodeID>> = get_uses(&function.nodes[id.idx()])
.as_ref()
.into_iter()
.map(|id| *id)
.map(|id| schedule_early[id.idx()])
.collect();
if let Some(use_places) = use_places {
// If every use has been placed, we can place this node as the
// lowest place in the domtree that dominates all of the use places.
let lowest = dom.lowest_amongst(use_places.into_iter());
schedule_early[id.idx()] = Some(lowest);
} else {
// If not, then just push this node back on the worklist.
worklist.push_back(id);
}
}
// Step 3: find anti-dependence edges. An anti-dependence edge needs to be
// drawn between a collection reading node and a collection mutating node
// when the following conditions are true:
//
// 1: The reading and mutating nodes may involve the same collection.
// 2: The node producing the collection used by the reading node is in a
// schedule early block that dominates the schedule early block of the
// mutating node. The node producing the collection used by the reading
// node may be an originator of a collection, phi or reduce, or mutator,
// but not forwarding read - forwarding reads are collapsed, and the
// bottom read is treated as reading from the transitive parent of the
// forwarding read(s).
let mut antideps = BTreeSet::new();
for id in reverse_postorder.iter() {
// Find a terminating read node and the collections it reads.
let terminating_reads: BTreeSet<_> =
terminating_reads(function, func_id, *id, objects).collect();
if !terminating_reads.is_empty() {
// Walk forwarding reads to find anti-dependency roots.
let mut workset = terminating_reads.clone();
let mut roots = BTreeSet::new();
while let Some(pop) = workset.pop_first() {
let forwarded: BTreeSet<_> =
forwarding_reads(function, func_id, pop, objects).collect();
if forwarded.is_empty() {
roots.insert(pop);
} else {
workset.extend(forwarded);
}
}
// For each root, find mutating nodes dominated by the root that
// modify an object read on any input of the current node (the
// terminating read).
// TODO: make this less outrageously inefficient.
let func_objects = &objects[&func_id];
for root in roots.iter() {
let root_early = schedule_early[root.idx()].unwrap();
let mut root_block_iterated_users: BTreeSet<NodeID> = BTreeSet::new();
let mut workset = BTreeSet::new();
workset.insert(*root);
while let Some(pop) = workset.pop_first() {
let users = def_use.get_users(pop).into_iter().filter(|user| {
!function.nodes[user.idx()].is_phi()
&& !function.nodes[user.idx()].is_reduce()
&& schedule_early[user.idx()].unwrap() == root_early
});
workset.extend(users.clone());
root_block_iterated_users.extend(users);
}
let read_objs: BTreeSet<_> = terminating_reads
.iter()
.map(|read_use| func_objects.objects(*read_use).into_iter())
.flatten()
.map(|id| *id)
.collect();
for mutator in reverse_postorder.iter() {
let mutator_early = schedule_early[mutator.idx()].unwrap();
if dom.does_dom(root_early, mutator_early)
&& (root_early != mutator_early
|| root_block_iterated_users.contains(&mutator))
&& mutating_objects(function, func_id, *mutator, objects)
.any(|mutated| read_objs.contains(&mutated))
&& id != mutator
{
antideps.insert((*id, *mutator));
}
}
}
}
}
let mut antideps_uses = vec![vec![]; function.nodes.len()];
let mut antideps_users = vec![vec![]; function.nodes.len()];
for (reader, mutator) in antideps.iter() {
antideps_uses[mutator.idx()].push(*reader);
antideps_users[reader.idx()].push(*mutator);
}
// Step 4: schedule late and pick each nodes final position. Since the late
// schedule of each node depends on the final positions of its users, these
// two steps must be fused. Compute their latest position, then use the
// control dependent + shallow loop heuristic to actually place them.
let join_fork_map: HashMap<NodeID, NodeID> = fork_join_map
.into_iter()
.map(|(fork, join)| (*join, *fork))
.collect();
let mut worklist = VecDeque::from_iter(reverse_postorder.into_iter().map(|id| *id).rev());
while let Some(id) = worklist.pop_front() {
if bbs[id.idx()].is_some() {
continue;
}
// Calculate the least common ancestor of user blocks, a.k.a. the "late"
// schedule.
let calculate_lca = || -> Option<_> {
let mut lca = None;
// Helper to incrementally update the LCA.
let mut update_lca = |a| {
if let Some(acc) = lca {
lca = Some(dom.least_common_ancestor(acc, a));
} else {
lca = Some(a);
}
};
// For every user, consider where we need to be to directly dominate the
// user.
for user in def_use
.get_users(id)
.as_ref()
.into_iter()
.chain(antideps_users[id.idx()].iter())
.map(|id| *id)
{
if let Node::Phi { control, data } = &function.nodes[user.idx()] {
// For phis, we need to dominate the block jumping to the phi in
// the slot that corresponds to our use.
for (control, data) in
zip(get_uses(&function.nodes[control.idx()]).as_ref(), data)
{
if id == *data {
update_lca(*control);
}
}
} else if let Node::Reduce {
control,
init,
reduct,
} = &function.nodes[user.idx()]
{
// For reduces, we need to either dominate the block right
// before the fork if we're the init input, or we need to
// dominate the join if we're the reduct input.
if id == *init {
let before_fork = function.nodes[join_fork_map[control].idx()]
.try_fork()
.unwrap()
.0;
update_lca(before_fork);
} else {
assert_eq!(id, *reduct);
update_lca(*control);
}
} else {
// For everything else, we just need to dominate the user.
update_lca(bbs[user.idx()]?);
}
}
Some(lca)
};
// Check if all users have been placed. If one of them hasn't, then add
// this node back on to the worklist.
let Some(lca) = calculate_lca() else {
worklist.push_back(id);
continue;
};
// Look between the LCA and the schedule early location to place the
// node.
let schedule_early = schedule_early[id.idx()].unwrap();
let schedule_late = lca.unwrap_or(schedule_early);
let mut chain = dom
// If the node has no users, then it doesn't really matter where we
// place it - just place it at the early placement.
.chain(schedule_late, schedule_early);
if let Some(mut location) = chain.next() {
/*
while let Some(control_node) = chain.next() {
// If the next node further up the dominator tree is in a shallower
// loop nest or if we can get out of a reduce loop when we don't
// need to be in one, place this data node in a higher-up location.
let old_nest = loops
.header_of(location)
.map(|header| loops.nesting(header).unwrap());
let new_nest = loops
.header_of(control_node)
.map(|header| loops.nesting(header).unwrap());
let shallower_nest = if let (Some(old_nest), Some(new_nest)) = (old_nest, new_nest)
{
old_nest > new_nest
} else {
// If the new location isn't a loop, it's nesting level should
// be considered "shallower" if the current location is in a
// loop.
old_nest.is_some()
};
// This will move all nodes that don't need to be in reduce loops
// outside of reduce loops. Nodes that do need to be in a reduce
// loop use the reduce node forming the loop, so the dominator chain
// will consist of one block, and this loop won't ever iterate.
let currently_at_join = function.nodes[location.idx()].is_join();
if shallower_nest || currently_at_join {
location = control_node;
}
}
*/
bbs[id.idx()] = Some(location);
} else {
// If there is no valid location for this node, then it's a reading
// node of a collection that can't be placed above a mutation that
// anti-depend uses it. Thus, a clone needs to be induced.
todo!()
}
}
let bbs: Vec<_> = bbs.into_iter().map(Option::unwrap).collect();
// Step 5: determine the order of nodes inside each block. Use worklist to
// add nodes to blocks in order that obeys dependencies.
let mut order: Vec<Vec<NodeID>> = vec![vec![]; function.nodes.len()];
let mut worklist = VecDeque::from_iter(
reverse_postorder
.into_iter()
.filter(|id| !function.nodes[id.idx()].is_control()),
);
let mut visited = bitvec![u8, Lsb0; 0; function.nodes.len()];
let mut no_change_iters = 0;
while no_change_iters <= worklist.len()
&& let Some(id) = worklist.pop_front()
{
let node = &function.nodes[id.idx()];
if node.is_phi()
|| node.is_reduce()
|| get_uses(node)
.as_ref()
.into_iter()
.chain(antideps_uses[id.idx()].iter())
.all(|u| {
function.nodes[u.idx()].is_control()
|| bbs[u.idx()] != bbs[id.idx()]
|| visited[u.idx()]
})
{
order[bbs[id.idx()].idx()].push(*id);
visited.set(id.idx(), true);
no_change_iters = 0;
} else {
worklist.push_back(id);
no_change_iters += 1;
}
}
if no_change_iters == 0 {
Ok((bbs, order))
} else {
// If the worklist exited without finishing, then there's at least one
// reading node of a collection that is in a anti-depend + normal depend
// use cycle with a mutating node. This cycle must be broken by inducing
// a clone.
todo!()
}
}
fn terminating_reads<'a>(
function: &'a Function,
func_id: FunctionID,
reader: NodeID,
objects: &'a CollectionObjects,
) -> Box<dyn Iterator<Item = NodeID> + 'a> {
match function.nodes[reader.idx()] {
Node::Read {
collect,
indices: _,
} if objects[&func_id].objects(reader).is_empty() => Box::new(once(collect)),
Node::Write {
collect: _,
data,
indices: _,
} if !objects[&func_id].objects(data).is_empty() => Box::new(once(data)),
Node::Call {
control: _,
function: callee,
dynamic_constants: _,
ref args,
} => Box::new(args.into_iter().enumerate().filter_map(move |(idx, arg)| {
let objects = &objects[&callee];
let returns = objects.returned_objects();
let param_obj = objects.param_to_object(idx)?;
if !objects.is_mutated(param_obj) && !returns.contains(¶m_obj) {
Some(*arg)
} else {
None
}
})),
_ => Box::new(empty()),
}
}
fn forwarding_reads<'a>(
function: &'a Function,
func_id: FunctionID,
reader: NodeID,
objects: &'a CollectionObjects,
) -> Box<dyn Iterator<Item = NodeID> + 'a> {
match function.nodes[reader.idx()] {
Node::Read {
collect,
indices: _,
} if !objects[&func_id].objects(reader).is_empty() => Box::new(once(collect)),
Node::Ternary {
op: TernaryOperator::Select,
first: _,
second,
third,
} if !objects[&func_id].objects(reader).is_empty() => {
Box::new(once(second).chain(once(third)))
}
Node::Call {
control: _,
function: callee,
dynamic_constants: _,
ref args,
} => Box::new(args.into_iter().enumerate().filter_map(move |(idx, arg)| {
let objects = &objects[&callee];
let returns = objects.returned_objects();
let param_obj = objects.param_to_object(idx)?;
if !objects.is_mutated(param_obj) && returns.contains(¶m_obj) {
Some(*arg)
} else {
None
}
})),
_ => Box::new(empty()),
}
}
fn mutating_objects<'a>(
function: &'a Function,
func_id: FunctionID,
mutator: NodeID,
objects: &'a CollectionObjects,
) -> Box<dyn Iterator<Item = CollectionObjectID> + 'a> {
match function.nodes[mutator.idx()] {
Node::Write {
collect,
data: _,
indices: _,
} => Box::new(objects[&func_id].objects(collect).into_iter().map(|id| *id)),
Node::Call {
control: _,
function: callee,
dynamic_constants: _,
ref args,
} => Box::new(
args.into_iter()
.enumerate()
.filter_map(move |(idx, arg)| {
let callee_objects = &objects[&callee];
let param_obj = callee_objects.param_to_object(idx)?;
if callee_objects.is_mutated(param_obj) {
Some(objects[&func_id].objects(*arg).into_iter().map(|id| *id))
} else {
None
}
})
.flatten(),
),
_ => Box::new(empty()),
}
}
/*
* Top level function to materialize clones of collections. This transformation
* eliminates the possibility of multiple independent writes (including dynamic
* writes) to a single collection by introducing extra collection constants and
* inserting explicit clones. This allows us to make the simplifying assumption
* in the backend that collections have reference, rather than value, semantics.
* The pass calling this function is mandatory for correctness.
*/
fn materialize_clones(
editor: &mut FunctionEditor,
typing: &Vec<TypeID>,
control_subgraph: &Subgraph,
dom: &DomTree,
loops: &LoopTree,
objects: &CollectionObjects,
bbs: &BasicBlocks,
) -> bool {
let rev_po = control_subgraph.rev_po(NodeID::new(0));
let mut total_num_pts = 0;
let mut bb_to_prefix_sum = vec![0; bbs.0.len()];
for ((idx, bb), insts) in zip(bbs.0.iter().enumerate(), bbs.1.iter()) {
if idx == bb.idx() {
bb_to_prefix_sum[idx] = total_num_pts;
total_num_pts += insts.len() + 1;
}
}
// Calculate two lattices - one that includes back edges, and one that
// doesn't. We want to handle simple clones before loop induced clones, so
// we first materialize clones based on the no-back-edges lattice, and hten
// based on the full lattice.
let mut no_back_edge_lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> =
vec![BTreeMap::new(); total_num_pts];
used_collections_dataflow(
editor,
&mut no_back_edge_lattice,
&rev_po,
&bb_to_prefix_sum,
control_subgraph,
objects,
bbs,
);
let mut super_value = BTreeMap::new();
if find_clones(
editor,
&super_value,
&no_back_edge_lattice,
&rev_po,
&typing,
control_subgraph,
dom,
loops,
objects,
&bb_to_prefix_sum,
bbs,
) {
return true;
}
// After inducing simple clones, calculate the full lattice and materialize
// any loop induced clones.
let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts];
loop {
let changed = used_collections_dataflow(
editor,
&mut lattice,
&rev_po,
&bb_to_prefix_sum,
control_subgraph,
objects,
bbs,
);
if !changed {
break;
}
}
for value in lattice.iter() {
meet(&mut super_value, value);
}
find_clones(
editor,
&super_value,
&lattice,
&rev_po,
&typing,
control_subgraph,
dom,
loops,
objects,
&bb_to_prefix_sum,
bbs,
)
}
fn meet(left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>, right: &BTreeMap<NodeID, BTreeSet<NodeID>>) {
for (used, users) in right.into_iter() {
left.entry(*used).or_default().extend(users.into_iter());
}
}
/*
* Helper function to run a single iteration of the used collections dataflow
* analysis. Returns whether the lattice was changed. The lattice maps each
* program point to a set of used values and their possible users. Top is that
* no nodes are used yet.
*/
fn used_collections_dataflow(
editor: &FunctionEditor,
lattice: &mut Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
rev_po: &Vec<NodeID>,
bb_to_prefix_sum: &Vec<usize>,
control_subgraph: &Subgraph,
objects: &CollectionObjects,
bbs: &BasicBlocks,
) -> bool {
// Run dataflow analysis to figure out which accesses to collections induce
// clones. This dataflow analysis depends on basic block assignments and is
// more analogous to standard dataflow analysis in CFG + SSA IRs. This is
// the only place this form is used, so just hardcode it here.
//
// This forward dataflow analysis tracks which collections are used at each
// program point, and by what user nodes. Collections are referred to using
// node IDs. Specifically:
//
// - Phi - a phi node adds its inputs to the used set and removes itself
// from the used set. If a phi uses an ID that is used along the edge of
// the corresponding predecessor, a clone is induced.
// - Select - a select node adds its inputs to the used set and removes
// itself from the used set. If either use is already used, a clone is
// induced.
// - Reduce - a reduce node adds its inputs to the used set and removes
// itself from the used set. If the `init` input is already used, a clone
// is induced. If the `reduct` input is used at the end of the basic block
// containing the reduce, then a clone is induced. At the end of the basic
// block, the reduce removes itself from the used set.
// - Read - a read node that reads a sub-collections from a collection,
// rather than reading a primitive type, adds its input to the used set
// and removes itself from the used set. If the `collect` input is already
// used, a clone is induced.
// - Write - a write node adds its `collect` input to the used set and
// removes itself from the used set. If the `collect` input is already
// used, a clone is induced.
// - Call - a call node adds any mutated input or input that may be returned
// to the used set and removes itself from the used set. If any mutated
// input is already used, a clone is induced.
//
// Reads of sub-collections (select, some read, and some call nodes) use a
// collection because they may have downstream writes that depend on the new
// "sub-view" of the same collection. This does not include reads that "end"
// (most reads, some calls, the `data` input of a write). This analysis does
// not consider parallel mutations in fork-joins.
let nodes = &editor.func().nodes;
let func_id = editor.func_id();
let mut changed = false;
for bb in rev_po.iter() {
// The lattice value of the first point is the meet of the
// predecessor terminating lattice values.
let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]];
let mut new_top_value = BTreeMap::new();
// Clearing `top_value` is not necessary since used nodes are never
// removed from lattice values, only added.
for pred in control_subgraph.preds(*bb) {
let last_pt = bbs.1[pred.idx()].len();
meet(
&mut new_top_value,
&lattice[bb_to_prefix_sum[pred.idx()] + last_pt],
);
}
changed |= *old_top_value != new_top_value;
lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value;
// The lattice value of following points are determined by their
// immediate preceding instructions.
let insts = &bbs.1[bb.idx()];
for (prev_pt, inst) in insts.iter().enumerate() {
let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1];
let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
let mut new_value = prev_value.clone();
match nodes[inst.idx()] {
Node::Phi {
control: _,
ref data,
} if !objects[&func_id].objects(*inst).is_empty() => {
for elem in data {
new_value.entry(*elem).or_default().insert(*inst);
}
new_value.remove(inst);
}
Node::Ternary {
op: TernaryOperator::Select,
first: _,
second,
third,
}
| Node::Reduce {
control: _,
init: second,
reduct: third,
} => {
if !objects[&func_id].objects(*inst).is_empty() {
new_value.entry(second).or_default().insert(*inst);
new_value.entry(third).or_default().insert(*inst);
new_value.remove(inst);
}
}
Node::Read {
collect,
indices: _,
} if !objects[&func_id].objects(*inst).is_empty() => {
new_value.entry(collect).or_default().insert(*inst);
new_value.remove(inst);
}
Node::Write {
collect,
data: _,
indices: _,
} => {
new_value.entry(collect).or_default().insert(*inst);
new_value.remove(inst);
}
Node::Call {
control: _,
function: callee,
dynamic_constants: _,
ref args,
} => {
let callee_objects = &objects[&callee];
for (param_idx, arg) in args.into_iter().enumerate() {
if callee_objects
.param_to_object(param_idx)
.map(|object| {
callee_objects.is_mutated(object)
|| callee_objects.returned_objects().contains(&object)
})
.unwrap_or(false)
{
new_value.entry(*arg).or_default().insert(*inst);
}
}
new_value.remove(inst);
}
_ => {}
}
changed |= *old_value != new_value;
lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value;
}
// Handle reduces in this block specially at the very end.
let last_pt = insts.len();
let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt];
let mut new_bottom_value = old_bottom_value.clone();
for inst in insts.iter() {
if let Node::Reduce {
control: _,
init: _,
reduct,
} = nodes[inst.idx()]
{
assert!(
new_bottom_value.contains_key(&reduct),
"PANIC: Can't handle clones inside a reduction cycle currently."
);
new_bottom_value.remove(inst);
}
}
changed |= *old_bottom_value != new_bottom_value;
lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value;
}
changed
}
/*
* Helper function to induce a clone once an object with multiple users has been
* found.
*/
fn induce_clone(
editor: &mut FunctionEditor,
object: NodeID,
user: NodeID,
value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
rev_po: &Vec<NodeID>,
typing: &Vec<TypeID>,
control_subgraph: &Subgraph,
dom: &DomTree,
loops: &LoopTree,
bb_to_prefix_sum: &Vec<usize>,
bbs: &BasicBlocks,
) {
// If `user` already used `object` and tries to use it again, then the
// clone is a "loop induced" clone. Otherwise, it's a simple clone.
if !value[&object].contains(&user) {
let success = editor.edit(|mut edit| {
// Create the constant collection object for allocation.
let object_ty = typing[object.idx()];
let object_cons = edit.add_zero_constant(object_ty);
let cons_node = edit.add_node(Node::Constant { id: object_cons });
// Create the clone into the new constant collection.
let clone_node = edit.add_node(Node::Write {
collect: cons_node,
data: object,
indices: vec![].into_boxed_slice(),
});
// Make user use the cloned object.
edit.replace_all_uses_where(object, clone_node, |id| *id == user)
});
assert!(success);
} else {
// Figure out where to place that phi. This is the deepest
// loop header where `user` is responsible for making `object` used
// used at the top of the block, and the block dominates the block
// containing `user`. If `user` is a phi, then the region it's
// attached to is excluded from eligibility.
let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| {
lattice[bb_to_prefix_sum[bb.idx()]]
.get(&object)
.unwrap_or(&BTreeSet::new())
.contains(&user)
&& dom.does_dom(*bb, bbs.0[user.idx()])
&& loops.contains(*bb)
&& loops.is_in_loop(*bb, bbs.0[user.idx()])
&& (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()])
});
let top_block = eligible_blocks
.max_by_key(|bb| loops.nesting(*bb).unwrap())
.unwrap();
assert!(editor.func().nodes[top_block.idx()].is_region());
// Figure out the users of `object` that we need to phi back
// upwards. Assign each user a number indicating how far down the
// user chain it is, higher is farther down. This is used for
// picking the most downstream user later.
let mut users: BTreeMap<NodeID, usize> = BTreeMap::new();
let mut workset: BTreeSet<NodeID> = BTreeSet::new();
workset.insert(object);
let mut chain_ordering = 1;
assert!(!super_value.is_empty());
while let Some(pop) = workset.pop_first() {
let iterated_users: BTreeSet<_> = super_value
.get(&pop)
.map(|users| users.into_iter())
.into_iter()
.flatten()
.map(|id| *id)
.filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()]))
.collect();
workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id)));
for user in iterated_users {
*users.entry(user).or_default() = chain_ordering;
chain_ordering += 1;
}
}
// The fringe users may not dominate any predecessors of the loop
// header. The following is some Juno code that exposes this:
//
// fn problematic(a : size) -> i32 {
// for i = 0 to a {
// let arr : i32[1];
// for j = 0 to a {
// arr[0] = 1;
// }
// }
// return 0;
// }
//
// Note that `arr` induces a clone each iteration, since its value
// needs to be reset to all zeros. However, it should also be noted
// that the most fringe user of `arr`, the write inside the inner
// loop, does not dominate the bottom of the outer loop. Thus, we
// need to insert a phi in the bottom block of the outer loop to
// retrieve either the write, or `arr` before the inner loop. The
// general version of this problem requires the following solution.
// Our goal is to figure out which downstream user represents
// `object` at each block in the loop. We first assign each block
// containing a user the most downstream user it contains. Then, we
// create a dummy phi for every region (including the header) in the
// loop, which is the downstream user for that block. Then, every
// other block is assigned the downstream user of its single
// predecessor. This basically amounts to recreating SSA for
// `object` inside the loop.
let mut user_per_loop_bb = BTreeMap::new();
let mut added_phis = BTreeMap::new();
let mut top_phi = NodeID::new(0);
// Assign existing users.
for (user, ordering) in users.iter() {
let bb = bbs.0[user.idx()];
if let Some(old_user) = user_per_loop_bb.get(&bb)
&& users[old_user] > *ordering
{
} else {
user_per_loop_bb.insert(bb, *user);
}
}
// Assign dummy phis.
for bb in loops.nodes_in_loop(top_block) {
if (!user_per_loop_bb.contains_key(&bb) || bb == top_block)
&& editor.func().nodes[bb.idx()].is_region()
{
let success = editor.edit(|mut edit| {
let phi_node = edit.add_node(Node::Phi {
control: bb,
data: empty().collect(),
});
if bb != top_block || !user_per_loop_bb.contains_key(&bb) {
user_per_loop_bb.insert(bb, phi_node);
}
if bb == top_block {
top_phi = phi_node;
}
added_phis.insert(phi_node, bb);
Ok(edit)
});
assert!(success);
}
}
// Assign users for the rest of the blocks.
for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) {
if !user_per_loop_bb.contains_key(&bb) {
assert!(control_subgraph.preds(*bb).count() == 1);
user_per_loop_bb.insert(
*bb,
user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()],
);
}
}
// Induce the clone.
let success = editor.edit(|mut edit| {
// Create the constant collection object for allocation.
let object_ty = typing[object.idx()];
let object_cons = edit.add_zero_constant(object_ty);
let cons_node = edit.add_node(Node::Constant { id: object_cons });
// Create the phis.
let mut phi_map = BTreeMap::new();
let mut real_phis = BTreeSet::new();
for (dummy, bb) in added_phis {
let real = edit.add_node(Node::Phi {
control: bb,
data: control_subgraph
.preds(bb)
.map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node))
.collect(),
});
phi_map.insert(dummy, real);
real_phis.insert(real);
}
// Create the clone into the phi.
let real_top_phi = phi_map[&top_phi];
let clone_node = edit.add_node(Node::Write {
collect: real_top_phi,
data: object,
indices: vec![].into_boxed_slice(),
});
// Make users use the cloned object.
edit = edit.replace_all_uses_where(object, clone_node, |id| {
id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()])
})?;
// Get rid of the dummy phis.
for (dummy, real) in phi_map {
edit = edit.replace_all_uses(dummy, real)?;
edit = edit.delete_node(dummy)?;
}
// Make phis use the clone instead of the top phi.
edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)
});
assert!(success);
// De-duplicate phis.
gvn(editor, false);
// Get rid of unused phis.
dce(editor);
// Simplify phis.
phi_elim(editor);
}
}
/*
* Helper function to analyze lattice values at each program point and find
* multiple dynamic users of a single write. Return as soon as any clone is
* found.
*/
fn find_clones(
editor: &mut FunctionEditor,
super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
rev_po: &Vec<NodeID>,
typing: &Vec<TypeID>,
control_subgraph: &Subgraph,
dom: &DomTree,
loops: &LoopTree,
objects: &CollectionObjects,
bb_to_prefix_sum: &Vec<usize>,
bbs: &BasicBlocks,
) -> bool {
let nodes = &editor.func().nodes;
let func_id = editor.func_id();
for bb in rev_po.iter().rev() {
let insts = &bbs.1[bb.idx()];
// Accumulate predecessor bottom used sets for phis. Phis are special in
// that they need to be path sensitive, but multiple phis in a single
// block may use a single collection, and that needs to induce a clone.
let mut phi_acc_bottoms = BTreeMap::new();
for (prev_pt, inst) in insts.iter().enumerate() {
let value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
match nodes[inst.idx()] {
Node::Phi {
control: _,
ref data,
} => {
// In phis, check if an argument is already used in the
// predecessor's bottom lattice value (phis need to be path-
// sensitive).
for (pred, arg) in zip(control_subgraph.preds(*bb), data) {
let bottom = phi_acc_bottoms.entry(pred).or_insert_with(|| {
let last_pt = bbs.1[pred.idx()].len();
let bottom = &lattice[bb_to_prefix_sum[pred.idx()] + last_pt];
bottom.clone()
});
if bottom.contains_key(&arg) {
induce_clone(
editor,
*arg,
*inst,
bottom,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
} else {
// Subsequent phis using `arg` along the same
// predecessor induce a clone.
bottom.insert(*arg, once(*inst).collect());
}
}
}
Node::Ternary {
op: TernaryOperator::Select,
first: _,
second,
third,
} => {
if value.contains_key(&second) {
induce_clone(
editor,
second,
*inst,
&value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
if value.contains_key(&third) {
induce_clone(
editor,
third,
*inst,
&value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
}
Node::Reduce {
control: _,
init,
reduct: _,
} => {
if value.contains_key(&init) {
induce_clone(
editor,
init,
*inst,
&value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
}
Node::Read {
collect,
indices: _,
} if !objects[&func_id].objects(*inst).is_empty() => {
if value.contains_key(&collect) {
induce_clone(
editor,
collect,
*inst,
&value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
}
Node::Write {
collect,
data: _,
indices: _,
} => {
if value.contains_key(&collect) {
induce_clone(
editor,
collect,
*inst,
&value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
}
Node::Call {
control: _,
function: callee,
dynamic_constants: _,
ref args,
} => {
let callee_objects = &objects[&callee];
for (param_idx, arg) in args.into_iter().enumerate() {
if callee_objects
.param_to_object(param_idx)
.map(|object| {
callee_objects.is_mutated(object)
|| callee_objects.returned_objects().contains(&object)
})
.unwrap_or(false)
&& value.contains_key(arg)
{
induce_clone(
editor,
*arg,
*inst,
value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
}
}
_ => {}
}
}
}
false
}