Skip to content
Snippets Groups Projects
Commit 9e5b344f authored by rarbore2's avatar rarbore2
Browse files

Handle loop vs. simple induced clones better

parent 157e4645
No related branches found
No related tags found
1 merge request!97Handle loop vs. simple induced clones better
......@@ -23,6 +23,7 @@ pub struct Subgraph {
original_num_nodes: u32,
}
#[derive(Debug, Clone)]
pub struct SubgraphIterator<'a> {
nodes: &'a Vec<NodeID>,
edges: &'a [u32],
......
......@@ -321,10 +321,11 @@ fn basic_blocks(
// Look between the LCA and the schedule early location to place the
// node.
let schedule_early = schedule_early[id.idx()].unwrap();
let schedule_late = lca.unwrap_or(schedule_early);
let mut chain = dom
// If the node has no users, then it doesn't really matter where we
// place it - just place it at the early placement.
.chain(lca.unwrap_or(schedule_early), schedule_early);
.chain(schedule_late, schedule_early);
if let Some(mut location) = chain.next() {
/*
......@@ -539,10 +540,108 @@ fn materialize_clones(
objects: &CollectionObjects,
bbs: &BasicBlocks,
) -> bool {
// First, run dataflow analysis to figure out which access to collections
// induce clones. This dataflow analysis depends on basic block assignments
// and is more analogous to standard dataflow analysis in CFG + SSA IRs.
// This is the only place this form is used, so just hardcode it here.
let rev_po = control_subgraph.rev_po(NodeID::new(0));
let mut total_num_pts = 0;
let mut bb_to_prefix_sum = vec![0; bbs.0.len()];
for ((idx, bb), insts) in zip(bbs.0.iter().enumerate(), bbs.1.iter()) {
if idx == bb.idx() {
bb_to_prefix_sum[idx] = total_num_pts;
total_num_pts += insts.len() + 1;
}
}
// Calculate two lattices - one that includes back edges, and one that
// doesn't. We want to handle simple clones before loop induced clones, so
// we first materialize clones based on the no-back-edges lattice, and hten
// based on the full lattice.
let mut no_back_edge_lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> =
vec![BTreeMap::new(); total_num_pts];
used_collections_dataflow(
editor,
&mut no_back_edge_lattice,
&rev_po,
&bb_to_prefix_sum,
control_subgraph,
objects,
bbs,
);
let mut super_value = BTreeMap::new();
if find_clones(
editor,
&super_value,
&no_back_edge_lattice,
&rev_po,
&typing,
control_subgraph,
dom,
loops,
objects,
&bb_to_prefix_sum,
bbs,
) {
return true;
}
// After inducing simple clones, calculate the full lattice and materialize
// any loop induced clones.
let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts];
loop {
let changed = used_collections_dataflow(
editor,
&mut lattice,
&rev_po,
&bb_to_prefix_sum,
control_subgraph,
objects,
bbs,
);
if !changed {
break;
}
}
for value in lattice.iter() {
meet(&mut super_value, value);
}
find_clones(
editor,
&super_value,
&lattice,
&rev_po,
&typing,
control_subgraph,
dom,
loops,
objects,
&bb_to_prefix_sum,
bbs,
)
}
fn meet(left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>, right: &BTreeMap<NodeID, BTreeSet<NodeID>>) {
for (used, users) in right.into_iter() {
left.entry(*used).or_default().extend(users.into_iter());
}
}
/*
* Helper function to run a single iteration of the used collections dataflow
* analysis. Returns whether the lattice was changed. The lattice maps each
* program point to a set of used values and their possible users. Top is that
* no nodes are used yet.
*/
fn used_collections_dataflow(
editor: &FunctionEditor,
lattice: &mut Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
rev_po: &Vec<NodeID>,
bb_to_prefix_sum: &Vec<usize>,
control_subgraph: &Subgraph,
objects: &CollectionObjects,
bbs: &BasicBlocks,
) -> bool {
// Run dataflow analysis to figure out which accesses to collections induce
// clones. This dataflow analysis depends on basic block assignments and is
// more analogous to standard dataflow analysis in CFG + SSA IRs. This is
// the only place this form is used, so just hardcode it here.
//
// This forward dataflow analysis tracks which collections are used at each
// program point, and by what user nodes. Collections are referred to using
......@@ -575,363 +674,364 @@ fn materialize_clones(
// "sub-view" of the same collection. This does not include reads that "end"
// (most reads, some calls, the `data` input of a write). This analysis does
// not consider parallel mutations in fork-joins.
let rev_po = control_subgraph.rev_po(NodeID::new(0));
let mut total_num_pts = 0;
let mut bb_to_prefix_sum = vec![0; bbs.0.len()];
for ((idx, bb), insts) in zip(bbs.0.iter().enumerate(), bbs.1.iter()) {
if idx == bb.idx() {
bb_to_prefix_sum[idx] = total_num_pts;
total_num_pts += insts.len() + 1;
}
}
// Lattice maps each program point to a set of used values and their
// possible users. Top is that no nodes are used yet.
let nodes = &editor.func().nodes;
let func_id = editor.func_id();
let meet = |left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>,
right: &BTreeMap<NodeID, BTreeSet<NodeID>>| {
for (used, users) in right.into_iter() {
left.entry(*used).or_default().extend(users.into_iter());
}
};
let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts];
loop {
let mut changed = false;
let mut changed = false;
for bb in rev_po.iter() {
// The lattice value of the first point is the meet of the
// predecessor terminating lattice values.
let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]];
let mut new_top_value = BTreeMap::new();
// Clearing `top_value` is not necessary since used nodes are never
// removed from lattice values, only added.
for pred in control_subgraph.preds(*bb) {
let last_pt = bbs.1[pred.idx()].len();
meet(
&mut new_top_value,
&lattice[bb_to_prefix_sum[pred.idx()] + last_pt],
);
}
changed |= *old_top_value != new_top_value;
lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value;
for bb in rev_po.iter() {
// The lattice value of the first point is the meet of the
// predecessor terminating lattice values.
let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]];
let mut new_top_value = BTreeMap::new();
// Clearing `top_value` is not necessary since used nodes are never
// removed from lattice values, only added.
for pred in control_subgraph.preds(*bb) {
let last_pt = bbs.1[pred.idx()].len();
meet(
&mut new_top_value,
&lattice[bb_to_prefix_sum[pred.idx()] + last_pt],
);
}
changed |= *old_top_value != new_top_value;
lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value;
// The lattice value of following points are determined by their
// immediate preceding instructions.
let insts = &bbs.1[bb.idx()];
for (prev_pt, inst) in insts.iter().enumerate() {
let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1];
let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
let mut new_value = prev_value.clone();
match nodes[inst.idx()] {
Node::Phi {
control: _,
ref data,
} if !objects[&func_id].objects(*inst).is_empty() => {
for elem in data {
new_value.entry(*elem).or_default().insert(*inst);
}
new_value.remove(inst);
}
Node::Ternary {
op: TernaryOperator::Select,
first: _,
second,
third,
}
| Node::Reduce {
control: _,
init: second,
reduct: third,
} => {
if !objects[&func_id].objects(*inst).is_empty() {
new_value.entry(second).or_default().insert(*inst);
new_value.entry(third).or_default().insert(*inst);
new_value.remove(inst);
}
}
Node::Read {
collect,
indices: _,
} if !objects[&func_id].objects(*inst).is_empty() => {
new_value.entry(collect).or_default().insert(*inst);
new_value.remove(inst);
}
Node::Write {
collect,
data: _,
indices: _,
} => {
new_value.entry(collect).or_default().insert(*inst);
new_value.remove(inst);
// The lattice value of following points are determined by their
// immediate preceding instructions.
let insts = &bbs.1[bb.idx()];
for (prev_pt, inst) in insts.iter().enumerate() {
let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1];
let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt];
let mut new_value = prev_value.clone();
match nodes[inst.idx()] {
Node::Phi {
control: _,
ref data,
} if !objects[&func_id].objects(*inst).is_empty() => {
for elem in data {
new_value.entry(*elem).or_default().insert(*inst);
}
Node::Call {
control: _,
function: callee,
dynamic_constants: _,
ref args,
} => {
let callee_objects = &objects[&callee];
for (param_idx, arg) in args.into_iter().enumerate() {
if callee_objects
.param_to_object(param_idx)
.map(|object| {
callee_objects.is_mutated(object)
|| callee_objects.returned_objects().contains(&object)
})
.unwrap_or(false)
{
new_value.entry(*arg).or_default().insert(*inst);
}
}
new_value.remove(inst);
}
Node::Ternary {
op: TernaryOperator::Select,
first: _,
second,
third,
}
| Node::Reduce {
control: _,
init: second,
reduct: third,
} => {
if !objects[&func_id].objects(*inst).is_empty() {
new_value.entry(second).or_default().insert(*inst);
new_value.entry(third).or_default().insert(*inst);
new_value.remove(inst);
}
_ => {}
}
changed |= *old_value != new_value;
lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value;
}
// Handle reduces in this block specially at the very end.
let last_pt = insts.len();
let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt];
let mut new_bottom_value = old_bottom_value.clone();
for inst in insts.iter() {
if let Node::Reduce {
Node::Read {
collect,
indices: _,
} if !objects[&func_id].objects(*inst).is_empty() => {
new_value.entry(collect).or_default().insert(*inst);
new_value.remove(inst);
}
Node::Write {
collect,
data: _,
indices: _,
} => {
new_value.entry(collect).or_default().insert(*inst);
new_value.remove(inst);
}
Node::Call {
control: _,
init: _,
reduct,
} = nodes[inst.idx()]
{
assert!(
new_bottom_value.contains_key(&reduct),
"PANIC: Can't handle clones inside a reduction cycle currently."
);
new_bottom_value.remove(inst);
function: callee,
dynamic_constants: _,
ref args,
} => {
let callee_objects = &objects[&callee];
for (param_idx, arg) in args.into_iter().enumerate() {
if callee_objects
.param_to_object(param_idx)
.map(|object| {
callee_objects.is_mutated(object)
|| callee_objects.returned_objects().contains(&object)
})
.unwrap_or(false)
{
new_value.entry(*arg).or_default().insert(*inst);
}
}
new_value.remove(inst);
}
_ => {}
}
changed |= *old_bottom_value != new_bottom_value;
lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value;
changed |= *old_value != new_value;
lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value;
}
if !changed {
break;
// Handle reduces in this block specially at the very end.
let last_pt = insts.len();
let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt];
let mut new_bottom_value = old_bottom_value.clone();
for inst in insts.iter() {
if let Node::Reduce {
control: _,
init: _,
reduct,
} = nodes[inst.idx()]
{
assert!(
new_bottom_value.contains_key(&reduct),
"PANIC: Can't handle clones inside a reduction cycle currently."
);
new_bottom_value.remove(inst);
}
}
}
let mut super_value = BTreeMap::new();
for value in lattice.iter() {
meet(&mut super_value, value);
changed |= *old_bottom_value != new_bottom_value;
lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value;
}
// Helper to induce a clone when an implicit clone is identified.
let nodes = nodes.clone();
let mut induce_clone = |object: NodeID,
user: NodeID,
value: &BTreeMap<NodeID, BTreeSet<NodeID>>| {
// If `user` already used `object` and tries to use it again, then the
// clone is a "loop induced" clone. Otherwise, it's a simple clone.
if !value[&object].contains(&user) {
let success = editor.edit(|mut edit| {
// Create the constant collection object for allocation.
let object_ty = typing[object.idx()];
let object_cons = edit.add_zero_constant(object_ty);
let cons_node = edit.add_node(Node::Constant { id: object_cons });
changed
}
// Create the clone into the new constant collection.
let clone_node = edit.add_node(Node::Write {
collect: cons_node,
data: object,
indices: vec![].into_boxed_slice(),
});
/*
* Helper function to induce a clone once an object with multiple users has been
* found.
*/
fn induce_clone(
editor: &mut FunctionEditor,
object: NodeID,
user: NodeID,
value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
rev_po: &Vec<NodeID>,
typing: &Vec<TypeID>,
control_subgraph: &Subgraph,
dom: &DomTree,
loops: &LoopTree,
bb_to_prefix_sum: &Vec<usize>,
bbs: &BasicBlocks,
) {
// If `user` already used `object` and tries to use it again, then the
// clone is a "loop induced" clone. Otherwise, it's a simple clone.
if !value[&object].contains(&user) {
let success = editor.edit(|mut edit| {
// Create the constant collection object for allocation.
let object_ty = typing[object.idx()];
let object_cons = edit.add_zero_constant(object_ty);
let cons_node = edit.add_node(Node::Constant { id: object_cons });
// Make user use the cloned object.
edit.replace_all_uses_where(object, clone_node, |id| *id == user)
});
assert!(success);
} else {
// Figure out where to place that phi. This is the deepest
// loop header where `user` is responsible for making `object` used
// used at the top of the block, and the block dominates the block
// containing `user`. If `user` is a phi, then the region it's
// attached to is excluded from eligibility.
let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| {
lattice[bb_to_prefix_sum[bb.idx()]]
.get(&object)
.unwrap_or(&BTreeSet::new())
.contains(&user)
&& dom.does_dom(*bb, bbs.0[user.idx()])
&& loops.contains(*bb)
&& loops.is_in_loop(*bb, bbs.0[user.idx()])
&& (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()])
// Create the clone into the new constant collection.
let clone_node = edit.add_node(Node::Write {
collect: cons_node,
data: object,
indices: vec![].into_boxed_slice(),
});
let top_block = eligible_blocks
.max_by_key(|bb| loops.nesting(*bb).unwrap())
.unwrap();
assert!(editor.func().nodes[top_block.idx()].is_region());
// Figure out the users of `object` that we need to phi back
// upwards. Assign each user a number indicating how far down the
// user chain it is, higher is farther down. This is used for
// picking the most downstream user later.
let mut users: BTreeMap<NodeID, usize> = BTreeMap::new();
let mut workset: BTreeSet<NodeID> = BTreeSet::new();
workset.insert(object);
let mut chain_ordering = 1;
while let Some(pop) = workset.pop_first() {
let iterated_users: BTreeSet<_> = super_value
.get(&pop)
.map(|users| users.into_iter())
.into_iter()
.flatten()
.map(|id| *id)
.filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()]))
.collect();
workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id)));
for user in iterated_users {
*users.entry(user).or_default() = chain_ordering;
chain_ordering += 1;
}
// Make user use the cloned object.
edit.replace_all_uses_where(object, clone_node, |id| *id == user)
});
assert!(success);
} else {
// Figure out where to place that phi. This is the deepest
// loop header where `user` is responsible for making `object` used
// used at the top of the block, and the block dominates the block
// containing `user`. If `user` is a phi, then the region it's
// attached to is excluded from eligibility.
let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| {
lattice[bb_to_prefix_sum[bb.idx()]]
.get(&object)
.unwrap_or(&BTreeSet::new())
.contains(&user)
&& dom.does_dom(*bb, bbs.0[user.idx()])
&& loops.contains(*bb)
&& loops.is_in_loop(*bb, bbs.0[user.idx()])
&& (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()])
});
let top_block = eligible_blocks
.max_by_key(|bb| loops.nesting(*bb).unwrap())
.unwrap();
assert!(editor.func().nodes[top_block.idx()].is_region());
// Figure out the users of `object` that we need to phi back
// upwards. Assign each user a number indicating how far down the
// user chain it is, higher is farther down. This is used for
// picking the most downstream user later.
let mut users: BTreeMap<NodeID, usize> = BTreeMap::new();
let mut workset: BTreeSet<NodeID> = BTreeSet::new();
workset.insert(object);
let mut chain_ordering = 1;
assert!(!super_value.is_empty());
while let Some(pop) = workset.pop_first() {
let iterated_users: BTreeSet<_> = super_value
.get(&pop)
.map(|users| users.into_iter())
.into_iter()
.flatten()
.map(|id| *id)
.filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()]))
.collect();
workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id)));
for user in iterated_users {
*users.entry(user).or_default() = chain_ordering;
chain_ordering += 1;
}
}
// The fringe users may not dominate any predecessors of the loop
// header. The following is some Juno code that exposes this:
//
// fn problematic(a : size) -> i32 {
// for i = 0 to a {
// let arr : i32[1];
// for j = 0 to a {
// arr[0] = 1;
// }
// }
// return 0;
// }
//
// Note that `arr` induces a clone each iteration, since its value
// needs to be reset to all zeros. However, it should also be noted
// that the most fringe user of `arr`, the write inside the inner
// loop, does not dominate the bottom of the outer loop. Thus, we
// need to insert a phi in the bottom block of the outer loop to
// retrieve either the write, or `arr` before the inner loop. The
// general version of this problem requires the following solution.
// Our goal is to figure out which downstream user represents
// `object` at each block in the loop. We first assign each block
// containing a user the most downstream user it contains. Then, we
// create a dummy phi for every region (including the header) in the
// loop, which is the downstream user for that block. Then, every
// other block is assigned the downstream user of its single
// predecessor. This basically amounts to recreating SSA for
// `object` inside the loop.
let mut user_per_loop_bb = BTreeMap::new();
let mut added_phis = BTreeMap::new();
let mut top_phi = NodeID::new(0);
// Assign existing users.
for (user, ordering) in users.iter() {
let bb = bbs.0[user.idx()];
if let Some(old_user) = user_per_loop_bb.get(&bb)
&& users[old_user] > *ordering
{
} else {
user_per_loop_bb.insert(bb, *user);
}
// The fringe users may not dominate any predecessors of the loop
// header. The following is some Juno code that exposes this:
//
// fn problematic(a : size) -> i32 {
// for i = 0 to a {
// let arr : i32[1];
// for j = 0 to a {
// arr[0] = 1;
// }
// }
// return 0;
// }
//
// Note that `arr` induces a clone each iteration, since its value
// needs to be reset to all zeros. However, it should also be noted
// that the most fringe user of `arr`, the write inside the inner
// loop, does not dominate the bottom of the outer loop. Thus, we
// need to insert a phi in the bottom block of the outer loop to
// retrieve either the write, or `arr` before the inner loop. The
// general version of this problem requires the following solution.
// Our goal is to figure out which downstream user represents
// `object` at each block in the loop. We first assign each block
// containing a user the most downstream user it contains. Then, we
// create a dummy phi for every region (including the header) in the
// loop, which is the downstream user for that block. Then, every
// other block is assigned the downstream user of its single
// predecessor. This basically amounts to recreating SSA for
// `object` inside the loop.
let mut user_per_loop_bb = BTreeMap::new();
let mut added_phis = BTreeMap::new();
let mut top_phi = NodeID::new(0);
// Assign existing users.
for (user, ordering) in users.iter() {
let bb = bbs.0[user.idx()];
if let Some(old_user) = user_per_loop_bb.get(&bb)
&& users[old_user] > *ordering
{
} else {
user_per_loop_bb.insert(bb, *user);
}
// Assign dummy phis.
for bb in loops.nodes_in_loop(top_block) {
if (!user_per_loop_bb.contains_key(&bb) || bb == top_block)
&& editor.func().nodes[bb.idx()].is_region()
{
let success = editor.edit(|mut edit| {
let phi_node = edit.add_node(Node::Phi {
control: bb,
data: empty().collect(),
});
if bb != top_block || !user_per_loop_bb.contains_key(&bb) {
user_per_loop_bb.insert(bb, phi_node);
}
if bb == top_block {
top_phi = phi_node;
}
added_phis.insert(phi_node, bb);
Ok(edit)
}
// Assign dummy phis.
for bb in loops.nodes_in_loop(top_block) {
if (!user_per_loop_bb.contains_key(&bb) || bb == top_block)
&& editor.func().nodes[bb.idx()].is_region()
{
let success = editor.edit(|mut edit| {
let phi_node = edit.add_node(Node::Phi {
control: bb,
data: empty().collect(),
});
assert!(success);
}
if bb != top_block || !user_per_loop_bb.contains_key(&bb) {
user_per_loop_bb.insert(bb, phi_node);
}
if bb == top_block {
top_phi = phi_node;
}
added_phis.insert(phi_node, bb);
Ok(edit)
});
assert!(success);
}
// Assign users for the rest of the blocks.
for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) {
if !user_per_loop_bb.contains_key(&bb) {
assert!(control_subgraph.preds(*bb).count() == 1);
user_per_loop_bb.insert(
*bb,
user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()],
);
}
}
// Assign users for the rest of the blocks.
for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) {
if !user_per_loop_bb.contains_key(&bb) {
assert!(control_subgraph.preds(*bb).count() == 1);
user_per_loop_bb.insert(
*bb,
user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()],
);
}
}
// Induce the clone.
let success = editor.edit(|mut edit| {
// Create the constant collection object for allocation.
let object_ty = typing[object.idx()];
let object_cons = edit.add_zero_constant(object_ty);
let cons_node = edit.add_node(Node::Constant { id: object_cons });
// Induce the clone.
let success = editor.edit(|mut edit| {
// Create the constant collection object for allocation.
let object_ty = typing[object.idx()];
let object_cons = edit.add_zero_constant(object_ty);
let cons_node = edit.add_node(Node::Constant { id: object_cons });
// Create the phis.
let mut phi_map = BTreeMap::new();
let mut real_phis = BTreeSet::new();
for (dummy, bb) in added_phis {
let real = edit.add_node(Node::Phi {
control: bb,
data: control_subgraph
.preds(bb)
.map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node))
.collect(),
});
phi_map.insert(dummy, real);
real_phis.insert(real);
}
// Create the clone into the phi.
let real_top_phi = phi_map[&top_phi];
let clone_node = edit.add_node(Node::Write {
collect: real_top_phi,
data: object,
indices: vec![].into_boxed_slice(),
// Create the phis.
let mut phi_map = BTreeMap::new();
let mut real_phis = BTreeSet::new();
for (dummy, bb) in added_phis {
let real = edit.add_node(Node::Phi {
control: bb,
data: control_subgraph
.preds(bb)
.map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node))
.collect(),
});
phi_map.insert(dummy, real);
real_phis.insert(real);
}
// Make users use the cloned object.
edit = edit.replace_all_uses_where(object, clone_node, |id| {
id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()])
})?;
// Create the clone into the phi.
let real_top_phi = phi_map[&top_phi];
let clone_node = edit.add_node(Node::Write {
collect: real_top_phi,
data: object,
indices: vec![].into_boxed_slice(),
});
// Get rid of the dummy phis.
for (dummy, real) in phi_map {
edit = edit.replace_all_uses(dummy, real)?;
edit = edit.delete_node(dummy)?;
}
// Make users use the cloned object.
edit = edit.replace_all_uses_where(object, clone_node, |id| {
id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()])
})?;
// Make phis use the clone instead of the top phi.
edit =
edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)?;
// Get rid of the dummy phis.
for (dummy, real) in phi_map {
edit = edit.replace_all_uses(dummy, real)?;
edit = edit.delete_node(dummy)?;
}
Ok(edit)
});
assert!(success);
// Make phis use the clone instead of the top phi.
edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)
});
assert!(success);
// De-duplicate phis.
gvn(editor, false);
// De-duplicate phis.
gvn(editor, false);
// Get rid of unused phis.
dce(editor);
// Get rid of unused phis.
dce(editor);
// Simplify phis.
phi_elim(editor);
}
};
// Simplify phis.
phi_elim(editor);
}
}
// Now that we've computed the used collections dataflow analysis, use the
// results to materialize a clone whenever a node attempts to use an already
// used node. As soon as any clone is found, return since that clone needs
// to get placed before other clones can be discovered. Traverse blocks in
// postorder so that clones inside loops are discovered before loop-induced
// clones.
/*
* Helper function to analyze lattice values at each program point and find
* multiple dynamic users of a single write. Return as soon as any clone is
* found.
*/
fn find_clones(
editor: &mut FunctionEditor,
super_value: &BTreeMap<NodeID, BTreeSet<NodeID>>,
lattice: &Vec<BTreeMap<NodeID, BTreeSet<NodeID>>>,
rev_po: &Vec<NodeID>,
typing: &Vec<TypeID>,
control_subgraph: &Subgraph,
dom: &DomTree,
loops: &LoopTree,
objects: &CollectionObjects,
bb_to_prefix_sum: &Vec<usize>,
bbs: &BasicBlocks,
) -> bool {
let nodes = &editor.func().nodes;
let func_id = editor.func_id();
for bb in rev_po.iter().rev() {
let insts = &bbs.1[bb.idx()];
// Accumulate predecessor bottom used sets for phis. Phis are special in
......@@ -955,7 +1055,21 @@ fn materialize_clones(
bottom.clone()
});
if bottom.contains_key(&arg) {
induce_clone(*arg, *inst, bottom);
induce_clone(
editor,
*arg,
*inst,
bottom,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
} else {
// Subsequent phis using `arg` along the same
......@@ -971,11 +1085,39 @@ fn materialize_clones(
third,
} => {
if value.contains_key(&second) {
induce_clone(second, *inst, &value);
induce_clone(
editor,
second,
*inst,
&value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
if value.contains_key(&third) {
induce_clone(third, *inst, &value);
induce_clone(
editor,
third,
*inst,
&value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
}
......@@ -985,7 +1127,21 @@ fn materialize_clones(
reduct: _,
} => {
if value.contains_key(&init) {
induce_clone(init, *inst, &value);
induce_clone(
editor,
init,
*inst,
&value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
}
......@@ -994,7 +1150,21 @@ fn materialize_clones(
indices: _,
} if !objects[&func_id].objects(*inst).is_empty() => {
if value.contains_key(&collect) {
induce_clone(collect, *inst, &value);
induce_clone(
editor,
collect,
*inst,
&value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
}
......@@ -1004,7 +1174,21 @@ fn materialize_clones(
indices: _,
} => {
if value.contains_key(&collect) {
induce_clone(collect, *inst, &value);
induce_clone(
editor,
collect,
*inst,
&value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
}
......@@ -1025,7 +1209,21 @@ fn materialize_clones(
.unwrap_or(false)
&& value.contains_key(arg)
{
induce_clone(*arg, *inst, value);
induce_clone(
editor,
*arg,
*inst,
value,
super_value,
lattice,
rev_po,
typing,
control_subgraph,
dom,
loops,
bb_to_prefix_sum,
bbs,
);
return true;
}
}
......
......@@ -64,16 +64,32 @@ fn tricky_loop_implicit_clone(a : usize, b : usize) -> i32 {
fn tricky2_loop_implicit_clone(a : usize, b : usize) -> i32 {
let x = 0;
for i = 0 to 3 {
let arr : i32[1];
let arr1 : i32[1];
let arr2 : i32[1];
if a == b {
arr[0] = 6;
arr1[0] = 6;
} else {
arr[0] = 9;
arr2[0] = 9;
}
arr1[0] = 2;
for j = 0 to 4 {
arr[0] += 1;
arr2[0] += 1;
}
x += arr2[0];
}
return x;
}
#[entry]
fn tricky3_loop_implicit_clone(a : usize, b : usize) -> usize {
let x = 0;
for i = 0 to b {
let arr1 : usize[10];
let arr2 : usize[10];
arr1[1] = 1;
for kk = 0 to 10 {
arr2[kk] += arr1[kk];
}
x += arr[0];
}
return x;
}
......
......@@ -27,6 +27,10 @@ fn main() {
println!("{}", output);
assert_eq!(output, 39);
let output = tricky3_loop_implicit_clone(5, 7).await;
println!("{}", output);
assert_eq!(output, 0);
let output = no_implicit_clone(4).await;
println!("{}", output);
assert_eq!(output, 13);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment