Skip to content
Snippets Groups Projects

Handle loop vs. simple induced clones better

Merged rarbore2 requested to merge clone_detection7 into main
1 file
+ 528
331
Compare changes
  • Side-by-side
  • Inline
@@ -540,10 +540,108 @@ fn materialize_clones(
@@ -540,10 +540,108 @@ fn materialize_clones(
objects: &CollectionObjects,
objects: &CollectionObjects,
bbs: &BasicBlocks,
bbs: &BasicBlocks,
) -> bool {
) -> bool {
// First, run dataflow analysis to figure out which access to collections
let rev_po = control_subgraph.rev_po(NodeID::new(0));
// induce clones. This dataflow analysis depends on basic block assignments
let mut total_num_pts = 0;
// and is more analogous to standard dataflow analysis in CFG + SSA IRs.
let mut bb_to_prefix_sum = vec![0; bbs.0.len()];
// This is the only place this form is used, so just hardcode it here.
for ((idx, bb), insts) in zip(bbs.0.iter().enumerate(), bbs.1.iter()) {
 
if idx == bb.idx() {
 
bb_to_prefix_sum[idx] = total_num_pts;
 
total_num_pts += insts.len() + 1;
 
}
 
}
 
 
// Calculate two lattices - one that includes back edges, and one that
 
// doesn't. We want to handle simple clones before loop induced clones, so
 
// we first materialize clones based on the no-back-edges lattice, and hten
 
// based on the full lattice.
 
let mut no_back_edge_lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> =
 
vec![BTreeMap::new(); total_num_pts];
 
used_collections_dataflow(
 
editor,
 
&mut no_back_edge_lattice,
 
&rev_po,
 
&bb_to_prefix_sum,
 
control_subgraph,
 
objects,
 
bbs,
 
);
 
let mut super_value = BTreeMap::new();
 
if find_clones(
 
editor,
 
&super_value,
 
&no_back_edge_lattice,
 
&rev_po,
 
&typing,
 
control_subgraph,
 
dom,
 
loops,
 
objects,
 
&bb_to_prefix_sum,
 
bbs,
 
) {
 
return true;
 
}
 
 
// After inducing simple clones, calculate the full lattice and materialize
 
// any loop induced clones.
 
let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts];
 
loop {
 
let changed = used_collections_dataflow(
 
editor,
 
&mut lattice,
 
&rev_po,
 
&bb_to_prefix_sum,
 
control_subgraph,
 
objects,
 
bbs,
 
);
 
if !changed {
 
break;
 
}
 
}
 
for value in lattice.iter() {
 
meet(&mut super_value, value);
 
}
 
find_clones(
 
editor,
 
&super_value,
 
&lattice,
 
&rev_po,
 
&typing,
 
control_subgraph,
 
dom,
 
loops,
 
objects,
 
&bb_to_prefix_sum,
 
bbs,
 
)
 
}
 
 
fn meet(left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>, right: &BTreeMap<NodeID, BTreeSet<NodeID>>) {
 
for (used, users) in right.into_iter() {
 
left.entry(*used).or_default().extend(users.into_iter());
 
}
 
}
 
 
/*
 
* Helper function to run a single iteration of the used collections dataflow
 
* analysis. Returns whether the lattice was changed. The lattice maps each
 
* program point to a set of used values and their possible users. Top is that
 
* no nodes are used yet.
 
*/
 
fn used_collections_dataflow(
 
editor: &FunctionEditor,
 
lattice: &mut Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
 
rev_po: &Vec<NodeID>,
 
bb_to_prefix_sum: &Vec<usize>,
 
control_subgraph: &Subgraph,
 
objects: &CollectionObjects,
 
bbs: &BasicBlocks,
 
) -> bool {
 
// Run dataflow analysis to figure out which accesses to collections induce
 
// clones. This dataflow analysis depends on basic block assignments and is
 
// more analogous to standard dataflow analysis in CFG + SSA IRs. This is
 
// the only place this form is used, so just hardcode it here.
//
//
// This forward dataflow analysis tracks which collections are used at each
// This forward dataflow analysis tracks which collections are used at each
// program point, and by what user nodes. Collections are referred to using
// program point, and by what user nodes. Collections are referred to using
@@ -576,363 +674,364 @@ fn materialize_clones(
@@ -576,363 +674,364 @@ fn materialize_clones(
// "sub-view" of the same collection. This does not include reads that "end"
// "sub-view" of the same collection. This does not include reads that "end"
// (most reads, some calls, the `data` input of a write). This analysis does
// (most reads, some calls, the `data` input of a write). This analysis does
// not consider parallel mutations in fork-joins.
// not consider parallel mutations in fork-joins.
let rev_po = control_subgraph.rev_po(NodeID::new(0));
let mut total_num_pts = 0;
let mut bb_to_prefix_sum = vec![0; bbs.0.len()];
for ((idx, bb), insts) in zip(bbs.0.iter().enumerate(), bbs.1.iter()) {
if idx == bb.idx() {
bb_to_prefix_sum[idx] = total_num_pts;
total_num_pts += insts.len() + 1;
}
}
// Lattice maps each program point to a set of used values and their
// possible users. Top is that no nodes are used yet.
let nodes = &editor.func().nodes;
let nodes = &editor.func().nodes;
let func_id = editor.func_id();
let func_id = editor.func_id();
let meet = |left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>,
let mut changed = false;
right: &BTreeMap<NodeID, BTreeSet<NodeID>>| {
for (used, users) in right.into_iter() {
left.entry(*used).or_default().extend(users.into_iter());
}
};
let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts];
loop {
let mut changed = false;
for bb in rev_po.iter() {
for bb in rev_po.iter() {
// The lattice value of the first point is the meet of the
// The lattice value of the first point is the meet of the
// predecessor terminating lattice values.
// predecessor terminating lattice values.
let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]];
let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]];
let mut new_top_value = BTreeMap::new();
let mut new_top_value = BTreeMap::new();
// Clearing `top_value` is not necessary since used nodes are never
// Clearing `top_value` is not necessary since used nodes are never
// removed from lattice values, only added.
// removed from lattice values, only added.
for pred in control_subgraph.preds(*bb) {
for pred in control_subgraph.preds(*bb) {
let last_pt = bbs.1[pred.idx()].len();
let last_pt = bbs.1[pred.idx()].len();
meet(
meet(
&mut new_top_value,
&mut new_top_value,
&lattice[bb_to_prefix_sum[pred.idx()] + last_pt],
&lattice[bb_to_prefix_sum[pred.idx()] + last_pt],
);
);
}
}
changed |= *old_top_value != new_top_value;
changed |= *old_top_value != new_top_value;
lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value;
lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value;
// The lattice value of following points are determined by their
// The lattice value of following points are determined by their
// immediate preceding instructions.
// immediate preceding instructions.
let insts = &bbs.1[bb.idx()];
let insts = &bbs.1[bb.idx()];
for (prev_pt, inst) in insts.iter().enumerate() {
for (prev_pt, inst) in insts.iter().enumerate() {
let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1];
let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1];
let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
let mut new_value = prev_value.clone();
let mut new_value = prev_value.clone();
match nodes[inst.idx()] {
match nodes[inst.idx()] {
Node::Phi {
Node::Phi {
control: _,
control: _,
ref data,
ref data,
} if !objects[&func_id].objects(*inst).is_empty() => {
} if !objects[&func_id].objects(*inst).is_empty() => {
for elem in data {
for elem in data {
new_value.entry(*elem).or_default().insert(*inst);
new_value.entry(*elem).or_default().insert(*inst);
}
new_value.remove(inst);
}
Node::Ternary {
op: TernaryOperator::Select,
first: _,
second,
third,
}
| Node::Reduce {
control: _,
init: second,
reduct: third,
} => {
if !objects[&func_id].objects(*inst).is_empty() {
new_value.entry(second).or_default().insert(*inst);
new_value.entry(third).or_default().insert(*inst);
new_value.remove(inst);
}
}
Node::Read {
collect,
indices: _,
} if !objects[&func_id].objects(*inst).is_empty() => {
new_value.entry(collect).or_default().insert(*inst);
new_value.remove(inst);
}
Node::Write {
collect,
data: _,
indices: _,
} => {
new_value.entry(collect).or_default().insert(*inst);
new_value.remove(inst);
}
}
Node::Call {
new_value.remove(inst);
control: _,
}
function: callee,
Node::Ternary {
dynamic_constants: _,
op: TernaryOperator::Select,
ref args,
first: _,
} => {
second,
let callee_objects = &objects[&callee];
third,
for (param_idx, arg) in args.into_iter().enumerate() {
}
if callee_objects
| Node::Reduce {
.param_to_object(param_idx)
control: _,
.map(|object| {
init: second,
callee_objects.is_mutated(object)
reduct: third,
|| callee_objects.returned_objects().contains(&object)
} => {
})
if !objects[&func_id].objects(*inst).is_empty() {
.unwrap_or(false)
new_value.entry(second).or_default().insert(*inst);
{
new_value.entry(third).or_default().insert(*inst);
new_value.entry(*arg).or_default().insert(*inst);
}
}
new_value.remove(inst);
new_value.remove(inst);
}
}
_ => {}
}
}
changed |= *old_value != new_value;
Node::Read {
lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value;
collect,
}
indices: _,
} if !objects[&func_id].objects(*inst).is_empty() => {
// Handle reduces in this block specially at the very end.
new_value.entry(collect).or_default().insert(*inst);
let last_pt = insts.len();
new_value.remove(inst);
let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt];
}
let mut new_bottom_value = old_bottom_value.clone();
Node::Write {
for inst in insts.iter() {
collect,
if let Node::Reduce {
data: _,
 
indices: _,
 
} => {
 
new_value.entry(collect).or_default().insert(*inst);
 
new_value.remove(inst);
 
}
 
Node::Call {
control: _,
control: _,
init: _,
function: callee,
reduct,
dynamic_constants: _,
} = nodes[inst.idx()]
ref args,
{
} => {
assert!(
let callee_objects = &objects[&callee];
new_bottom_value.contains_key(&reduct),
for (param_idx, arg) in args.into_iter().enumerate() {
"PANIC: Can't handle clones inside a reduction cycle currently."
if callee_objects
);
.param_to_object(param_idx)
new_bottom_value.remove(inst);
.map(|object| {
 
callee_objects.is_mutated(object)
 
|| callee_objects.returned_objects().contains(&object)
 
})
 
.unwrap_or(false)
 
{
 
new_value.entry(*arg).or_default().insert(*inst);
 
}
 
}
 
new_value.remove(inst);
}
}
 
_ => {}
}
}
changed |= *old_bottom_value != new_bottom_value;
changed |= *old_value != new_value;
lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value;
lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value;
}
}
if !changed {
// Handle reduces in this block specially at the very end.
break;
let last_pt = insts.len();
 
let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt];
 
let mut new_bottom_value = old_bottom_value.clone();
 
for inst in insts.iter() {
 
if let Node::Reduce {
 
control: _,
 
init: _,
 
reduct,
 
} = nodes[inst.idx()]
 
{
 
assert!(
 
new_bottom_value.contains_key(&reduct),
 
"PANIC: Can't handle clones inside a reduction cycle currently."
 
);
 
new_bottom_value.remove(inst);
 
}
}
}
}
changed |= *old_bottom_value != new_bottom_value;
let mut super_value = BTreeMap::new();
lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value;
for value in lattice.iter() {
meet(&mut super_value, value);
}
}
// Helper to induce a clone when an implicit clone is identified.
changed
let nodes = nodes.clone();
}
let mut induce_clone = |object: NodeID,
user: NodeID,
value: &BTreeMap<NodeID, BTreeSet<NodeID>>| {
// If `user` already used `object` and tries to use it again, then the
// clone is a "loop induced" clone. Otherwise, it's a simple clone.
if !value[&object].contains(&user) {
let success = editor.edit(|mut edit| {
// Create the constant collection object for allocation.
let object_ty = typing[object.idx()];
let object_cons = edit.add_zero_constant(object_ty);
let cons_node = edit.add_node(Node::Constant { id: object_cons });
// Create the clone into the new constant collection.
/*
let clone_node = edit.add_node(Node::Write {
* Helper function to induce a clone once an object with multiple users has been
collect: cons_node,
* found.
data: object,
*/
indices: vec![].into_boxed_slice(),
fn induce_clone(
});
editor: &mut FunctionEditor,
 
object: NodeID,
 
user: NodeID,
 
value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
 
super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
 
lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
 
rev_po: &Vec<NodeID>,
 
typing: &Vec<TypeID>,
 
control_subgraph: &Subgraph,
 
dom: &DomTree,
 
loops: &LoopTree,
 
bb_to_prefix_sum: &Vec<usize>,
 
bbs: &BasicBlocks,
 
) {
 
// If `user` already used `object` and tries to use it again, then the
 
// clone is a "loop induced" clone. Otherwise, it's a simple clone.
 
if !value[&object].contains(&user) {
 
let success = editor.edit(|mut edit| {
 
// Create the constant collection object for allocation.
 
let object_ty = typing[object.idx()];
 
let object_cons = edit.add_zero_constant(object_ty);
 
let cons_node = edit.add_node(Node::Constant { id: object_cons });
// Make user use the cloned object.
// Create the clone into the new constant collection.
edit.replace_all_uses_where(object, clone_node, |id| *id == user)
let clone_node = edit.add_node(Node::Write {
});
collect: cons_node,
assert!(success);
data: object,
} else {
indices: vec![].into_boxed_slice(),
// Figure out where to place that phi. This is the deepest
// loop header where `user` is responsible for making `object` used
// used at the top of the block, and the block dominates the block
// containing `user`. If `user` is a phi, then the region it's
// attached to is excluded from eligibility.
let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| {
lattice[bb_to_prefix_sum[bb.idx()]]
.get(&object)
.unwrap_or(&BTreeSet::new())
.contains(&user)
&& dom.does_dom(*bb, bbs.0[user.idx()])
&& loops.contains(*bb)
&& loops.is_in_loop(*bb, bbs.0[user.idx()])
&& (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()])
});
});
let top_block = eligible_blocks
.max_by_key(|bb| loops.nesting(*bb).unwrap())
.unwrap();
assert!(editor.func().nodes[top_block.idx()].is_region());
// Figure out the users of `object` that we need to phi back
// Make user use the cloned object.
// upwards. Assign each user a number indicating how far down the
edit.replace_all_uses_where(object, clone_node, |id| *id == user)
// user chain it is, higher is farther down. This is used for
});
// picking the most downstream user later.
assert!(success);
let mut users: BTreeMap<NodeID, usize> = BTreeMap::new();
} else {
let mut workset: BTreeSet<NodeID> = BTreeSet::new();
// Figure out where to place that phi. This is the deepest
workset.insert(object);
// loop header where `user` is responsible for making `object` used
let mut chain_ordering = 1;
// used at the top of the block, and the block dominates the block
while let Some(pop) = workset.pop_first() {
// containing `user`. If `user` is a phi, then the region it's
let iterated_users: BTreeSet<_> = super_value
// attached to is excluded from eligibility.
.get(&pop)
let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| {
.map(|users| users.into_iter())
lattice[bb_to_prefix_sum[bb.idx()]]
.into_iter()
.get(&object)
.flatten()
.unwrap_or(&BTreeSet::new())
.map(|id| *id)
.contains(&user)
.filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()]))
&& dom.does_dom(*bb, bbs.0[user.idx()])
.collect();
&& loops.contains(*bb)
workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id)));
&& loops.is_in_loop(*bb, bbs.0[user.idx()])
for user in iterated_users {
&& (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()])
*users.entry(user).or_default() = chain_ordering;
});
chain_ordering += 1;
let top_block = eligible_blocks
}
.max_by_key(|bb| loops.nesting(*bb).unwrap())
 
.unwrap();
 
assert!(editor.func().nodes[top_block.idx()].is_region());
 
 
// Figure out the users of `object` that we need to phi back
 
// upwards. Assign each user a number indicating how far down the
 
// user chain it is, higher is farther down. This is used for
 
// picking the most downstream user later.
 
let mut users: BTreeMap<NodeID, usize> = BTreeMap::new();
 
let mut workset: BTreeSet<NodeID> = BTreeSet::new();
 
workset.insert(object);
 
let mut chain_ordering = 1;
 
assert!(!super_value.is_empty());
 
while let Some(pop) = workset.pop_first() {
 
let iterated_users: BTreeSet<_> = super_value
 
.get(&pop)
 
.map(|users| users.into_iter())
 
.into_iter()
 
.flatten()
 
.map(|id| *id)
 
.filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()]))
 
.collect();
 
workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id)));
 
for user in iterated_users {
 
*users.entry(user).or_default() = chain_ordering;
 
chain_ordering += 1;
}
}
 
}
// The fringe users may not dominate any predecessors of the loop
// The fringe users may not dominate any predecessors of the loop
// header. The following is some Juno code that exposes this:
// header. The following is some Juno code that exposes this:
//
//
// fn problematic(a : size) -> i32 {
// fn problematic(a : size) -> i32 {
// for i = 0 to a {
// for i = 0 to a {
// let arr : i32[1];
// let arr : i32[1];
// for j = 0 to a {
// for j = 0 to a {
// arr[0] = 1;
// arr[0] = 1;
// }
// }
// }
// }
// return 0;
// return 0;
// }
// }
//
//
// Note that `arr` induces a clone each iteration, since its value
// Note that `arr` induces a clone each iteration, since its value
// needs to be reset to all zeros. However, it should also be noted
// needs to be reset to all zeros. However, it should also be noted
// that the most fringe user of `arr`, the write inside the inner
// that the most fringe user of `arr`, the write inside the inner
// loop, does not dominate the bottom of the outer loop. Thus, we
// loop, does not dominate the bottom of the outer loop. Thus, we
// need to insert a phi in the bottom block of the outer loop to
// need to insert a phi in the bottom block of the outer loop to
// retrieve either the write, or `arr` before the inner loop. The
// retrieve either the write, or `arr` before the inner loop. The
// general version of this problem requires the following solution.
// general version of this problem requires the following solution.
// Our goal is to figure out which downstream user represents
// Our goal is to figure out which downstream user represents
// `object` at each block in the loop. We first assign each block
// `object` at each block in the loop. We first assign each block
// containing a user the most downstream user it contains. Then, we
// containing a user the most downstream user it contains. Then, we
// create a dummy phi for every region (including the header) in the
// create a dummy phi for every region (including the header) in the
// loop, which is the downstream user for that block. Then, every
// loop, which is the downstream user for that block. Then, every
// other block is assigned the downstream user of its single
// other block is assigned the downstream user of its single
// predecessor. This basically amounts to recreating SSA for
// predecessor. This basically amounts to recreating SSA for
// `object` inside the loop.
// `object` inside the loop.
let mut user_per_loop_bb = BTreeMap::new();
let mut user_per_loop_bb = BTreeMap::new();
let mut added_phis = BTreeMap::new();
let mut added_phis = BTreeMap::new();
let mut top_phi = NodeID::new(0);
let mut top_phi = NodeID::new(0);
// Assign existing users.
// Assign existing users.
for (user, ordering) in users.iter() {
for (user, ordering) in users.iter() {
let bb = bbs.0[user.idx()];
let bb = bbs.0[user.idx()];
if let Some(old_user) = user_per_loop_bb.get(&bb)
if let Some(old_user) = user_per_loop_bb.get(&bb)
&& users[old_user] > *ordering
&& users[old_user] > *ordering
{
{
} else {
} else {
user_per_loop_bb.insert(bb, *user);
user_per_loop_bb.insert(bb, *user);
}
}
}
// Assign dummy phis.
}
for bb in loops.nodes_in_loop(top_block) {
// Assign dummy phis.
if (!user_per_loop_bb.contains_key(&bb) || bb == top_block)
for bb in loops.nodes_in_loop(top_block) {
&& editor.func().nodes[bb.idx()].is_region()
if (!user_per_loop_bb.contains_key(&bb) || bb == top_block)
{
&& editor.func().nodes[bb.idx()].is_region()
let success = editor.edit(|mut edit| {
{
let phi_node = edit.add_node(Node::Phi {
let success = editor.edit(|mut edit| {
control: bb,
let phi_node = edit.add_node(Node::Phi {
data: empty().collect(),
control: bb,
});
data: empty().collect(),
if bb != top_block || !user_per_loop_bb.contains_key(&bb) {
user_per_loop_bb.insert(bb, phi_node);
}
if bb == top_block {
top_phi = phi_node;
}
added_phis.insert(phi_node, bb);
Ok(edit)
});
});
assert!(success);
if bb != top_block || !user_per_loop_bb.contains_key(&bb) {
}
user_per_loop_bb.insert(bb, phi_node);
 
}
 
if bb == top_block {
 
top_phi = phi_node;
 
}
 
added_phis.insert(phi_node, bb);
 
Ok(edit)
 
});
 
assert!(success);
}
}
// Assign users for the rest of the blocks.
}
for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) {
// Assign users for the rest of the blocks.
if !user_per_loop_bb.contains_key(&bb) {
for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) {
assert!(control_subgraph.preds(*bb).count() == 1);
if !user_per_loop_bb.contains_key(&bb) {
user_per_loop_bb.insert(
assert!(control_subgraph.preds(*bb).count() == 1);
*bb,
user_per_loop_bb.insert(
user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()],
*bb,
);
user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()],
}
);
}
}
 
}
// Induce the clone.
// Induce the clone.
let success = editor.edit(|mut edit| {
let success = editor.edit(|mut edit| {
// Create the constant collection object for allocation.
// Create the constant collection object for allocation.
let object_ty = typing[object.idx()];
let object_ty = typing[object.idx()];
let object_cons = edit.add_zero_constant(object_ty);
let object_cons = edit.add_zero_constant(object_ty);
let cons_node = edit.add_node(Node::Constant { id: object_cons });
let cons_node = edit.add_node(Node::Constant { id: object_cons });
// Create the phis.
// Create the phis.
let mut phi_map = BTreeMap::new();
let mut phi_map = BTreeMap::new();
let mut real_phis = BTreeSet::new();
let mut real_phis = BTreeSet::new();
for (dummy, bb) in added_phis {
for (dummy, bb) in added_phis {
let real = edit.add_node(Node::Phi {
let real = edit.add_node(Node::Phi {
control: bb,
control: bb,
data: control_subgraph
data: control_subgraph
.preds(bb)
.preds(bb)
.map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node))
.map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node))
.collect(),
.collect(),
});
phi_map.insert(dummy, real);
real_phis.insert(real);
}
// Create the clone into the phi.
let real_top_phi = phi_map[&top_phi];
let clone_node = edit.add_node(Node::Write {
collect: real_top_phi,
data: object,
indices: vec![].into_boxed_slice(),
});
});
 
phi_map.insert(dummy, real);
 
real_phis.insert(real);
 
}
// Make users use the cloned object.
// Create the clone into the phi.
edit = edit.replace_all_uses_where(object, clone_node, |id| {
let real_top_phi = phi_map[&top_phi];
id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()])
let clone_node = edit.add_node(Node::Write {
})?;
collect: real_top_phi,
 
data: object,
 
indices: vec![].into_boxed_slice(),
 
});
// Get rid of the dummy phis.
// Make users use the cloned object.
for (dummy, real) in phi_map {
edit = edit.replace_all_uses_where(object, clone_node, |id| {
edit = edit.replace_all_uses(dummy, real)?;
id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()])
edit = edit.delete_node(dummy)?;
})?;
}
// Make phis use the clone instead of the top phi.
// Get rid of the dummy phis.
edit =
for (dummy, real) in phi_map {
edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)?;
edit = edit.replace_all_uses(dummy, real)?;
 
edit = edit.delete_node(dummy)?;
 
}
Ok(edit)
// Make phis use the clone instead of the top phi.
});
edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)
assert!(success);
});
 
assert!(success);
// De-duplicate phis.
// De-duplicate phis.
gvn(editor, false);
gvn(editor, false);
// Get rid of unused phis.
// Get rid of unused phis.
dce(editor);
dce(editor);
// Simplify phis.
// Simplify phis.
phi_elim(editor);
phi_elim(editor);
}
}
};
}
// Now that we've computed the used collections dataflow analysis, use the
/*
// results to materialize a clone whenever a node attempts to use an already
* Helper function to analyze lattice values at each program point and find
// used node. As soon as any clone is found, return since that clone needs
* multiple dynamic users of a single write. Return as soon as any clone is
// to get placed before other clones can be discovered. Traverse blocks in
* found.
// postorder so that clones inside loops are discovered before loop-induced
*/
// clones.
fn find_clones(
 
editor: &mut FunctionEditor,
 
super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
 
lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
 
rev_po: &Vec<NodeID>,
 
typing: &Vec<TypeID>,
 
control_subgraph: &Subgraph,
 
dom: &DomTree,
 
loops: &LoopTree,
 
objects: &CollectionObjects,
 
bb_to_prefix_sum: &Vec<usize>,
 
bbs: &BasicBlocks,
 
) -> bool {
 
let nodes = &editor.func().nodes;
 
let func_id = editor.func_id();
for bb in rev_po.iter().rev() {
for bb in rev_po.iter().rev() {
let insts = &bbs.1[bb.idx()];
let insts = &bbs.1[bb.idx()];
// Accumulate predecessor bottom used sets for phis. Phis are special in
// Accumulate predecessor bottom used sets for phis. Phis are special in
@@ -956,7 +1055,21 @@ fn materialize_clones(
@@ -956,7 +1055,21 @@ fn materialize_clones(
bottom.clone()
bottom.clone()
});
});
if bottom.contains_key(&arg) {
if bottom.contains_key(&arg) {
induce_clone(*arg, *inst, bottom);
induce_clone(
 
editor,
 
*arg,
 
*inst,
 
bottom,
 
super_value,
 
lattice,
 
rev_po,
 
typing,
 
control_subgraph,
 
dom,
 
loops,
 
bb_to_prefix_sum,
 
bbs,
 
);
return true;
return true;
} else {
} else {
// Subsequent phis using `arg` along the same
// Subsequent phis using `arg` along the same
@@ -972,11 +1085,39 @@ fn materialize_clones(
@@ -972,11 +1085,39 @@ fn materialize_clones(
third,
third,
} => {
} => {
if value.contains_key(&second) {
if value.contains_key(&second) {
induce_clone(second, *inst, &value);
induce_clone(
 
editor,
 
second,
 
*inst,
 
&value,
 
super_value,
 
lattice,
 
rev_po,
 
typing,
 
control_subgraph,
 
dom,
 
loops,
 
bb_to_prefix_sum,
 
bbs,
 
);
return true;
return true;
}
}
if value.contains_key(&third) {
if value.contains_key(&third) {
induce_clone(third, *inst, &value);
induce_clone(
 
editor,
 
third,
 
*inst,
 
&value,
 
super_value,
 
lattice,
 
rev_po,
 
typing,
 
control_subgraph,
 
dom,
 
loops,
 
bb_to_prefix_sum,
 
bbs,
 
);
return true;
return true;
}
}
}
}
@@ -986,7 +1127,21 @@ fn materialize_clones(
@@ -986,7 +1127,21 @@ fn materialize_clones(
reduct: _,
reduct: _,
} => {
} => {
if value.contains_key(&init) {
if value.contains_key(&init) {
induce_clone(init, *inst, &value);
induce_clone(
 
editor,
 
init,
 
*inst,
 
&value,
 
super_value,
 
lattice,
 
rev_po,
 
typing,
 
control_subgraph,
 
dom,
 
loops,
 
bb_to_prefix_sum,
 
bbs,
 
);
return true;
return true;
}
}
}
}
@@ -995,7 +1150,21 @@ fn materialize_clones(
@@ -995,7 +1150,21 @@ fn materialize_clones(
indices: _,
indices: _,
} if !objects[&func_id].objects(*inst).is_empty() => {
} if !objects[&func_id].objects(*inst).is_empty() => {
if value.contains_key(&collect) {
if value.contains_key(&collect) {
induce_clone(collect, *inst, &value);
induce_clone(
 
editor,
 
collect,
 
*inst,
 
&value,
 
super_value,
 
lattice,
 
rev_po,
 
typing,
 
control_subgraph,
 
dom,
 
loops,
 
bb_to_prefix_sum,
 
bbs,
 
);
return true;
return true;
}
}
}
}
@@ -1005,7 +1174,21 @@ fn materialize_clones(
@@ -1005,7 +1174,21 @@ fn materialize_clones(
indices: _,
indices: _,
} => {
} => {
if value.contains_key(&collect) {
if value.contains_key(&collect) {
induce_clone(collect, *inst, &value);
induce_clone(
 
editor,
 
collect,
 
*inst,
 
&value,
 
super_value,
 
lattice,
 
rev_po,
 
typing,
 
control_subgraph,
 
dom,
 
loops,
 
bb_to_prefix_sum,
 
bbs,
 
);
return true;
return true;
}
}
}
}
@@ -1026,7 +1209,21 @@ fn materialize_clones(
@@ -1026,7 +1209,21 @@ fn materialize_clones(
.unwrap_or(false)
.unwrap_or(false)
&& value.contains_key(arg)
&& value.contains_key(arg)
{
{
induce_clone(*arg, *inst, value);
induce_clone(
 
editor,
 
*arg,
 
*inst,
 
value,
 
super_value,
 
lattice,
 
rev_po,
 
typing,
 
control_subgraph,
 
dom,
 
loops,
 
bb_to_prefix_sum,
 
bbs,
 
);
return true;
return true;
}
}
}
}
Loading