From 5fa92052468fbe317c0c8b54921972e7a3ff7160 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Mon, 6 Jan 2025 11:21:05 -0800 Subject: [PATCH 01/14] Fix handling of clones with phis --- hercules_opt/src/legalize_reference_semantics.rs | 15 +++++++++++++-- juno_samples/matmul/src/matmul.jn | 6 ++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index 254524f9..cd7a3d5a 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -720,6 +720,10 @@ fn materialize_clones( let nodes = nodes.clone(); for bb in rev_po.iter() { let insts = &bbs.1[bb.idx()]; + // Accumulate predecessor bottom used sets for phis. Phis are special in + // that they need to be path sensitive, but multiple phis in a single + // block may use a single collection, and that needs to induce a clone. + let mut phi_acc_bottoms = HashMap::new(); for (prev_pt, inst) in insts.iter().enumerate() { let value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt]; match nodes[inst.idx()] { @@ -731,11 +735,18 @@ fn materialize_clones( // predecessor's bottom lattice value (phis need to be path- // sensitive). for (pred, arg) in zip(control_subgraph.preds(*bb), data) { - let last_pt = bbs.1[pred.idx()].len(); - let bottom = &lattice[bb_to_prefix_sum[pred.idx()] + last_pt]; + let bottom = phi_acc_bottoms.entry(pred).or_insert_with(|| { + let last_pt = bbs.1[pred.idx()].len(); + let bottom = &lattice[bb_to_prefix_sum[pred.idx()] + last_pt]; + bottom.clone() + }); if bottom.contains(arg) { induce_clone(editor, typing, *arg, *inst); any_induced = true; + } else { + // Subsequent phis using `arg` along the same + // predecessor induce a clone. + bottom.insert(*arg); } } } diff --git a/juno_samples/matmul/src/matmul.jn b/juno_samples/matmul/src/matmul.jn index 775bb382..16a262b9 100644 --- a/juno_samples/matmul/src/matmul.jn +++ b/juno_samples/matmul/src/matmul.jn @@ -20,10 +20,8 @@ fn tiled_64_matmul<n : usize, m : usize, l : usize>(a : i32[n, m], b : i32[m, l] for bi = 0 to n / 64 { for bk = 0 to l / 64 { - // TODO: make these all the same size, clone analysis should undo GVN's - // combining of these three arrays. - let atile : i32[66, 64]; - let btile : i32[65, 64]; + let atile : i32[64, 64]; + let btile : i32[64, 64]; let ctile : i32[64, 64]; for tile_idx = 0 to m / 64 { -- GitLab From a4b47f937a6e4d888251bb507a7b94ec0a3629c4 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Mon, 6 Jan 2025 15:40:34 -0800 Subject: [PATCH 02/14] Do enough analysis to identify loop-induced clones --- .../src/legalize_reference_semantics.rs | 200 ++++++++++-------- 1 file changed, 107 insertions(+), 93 deletions(-) diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index cd7a3d5a..b3a106d5 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -2,9 +2,8 @@ extern crate bitvec; extern crate hercules_cg; extern crate hercules_ir; -use std::collections::{BTreeSet, HashMap, VecDeque}; +use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque}; use std::iter::{empty, once, zip, FromIterator}; -use std::mem::take; use self::bitvec::prelude::*; @@ -70,7 +69,7 @@ pub fn legalize_reference_semantics( ) { Ok(bbs) => bbs, Err((obj, reader)) => { - induce_clone(editor, typing, obj, reader); + todo!(); return None; } }; @@ -543,7 +542,8 @@ fn materialize_clones( // This is the only place this form is used, so just hardcode it here. // // This forward dataflow analysis tracks which collections are used at each - // program point. Collections are referred to using node IDs. Specifically: + // program point, and by what user nodes. Collections are referred to using + // node IDs. Specifically: // // - Phi - a phi node adds its inputs to the used set and removes itself // from the used set. If a phi uses an ID that is used along the edge of @@ -567,11 +567,11 @@ fn materialize_clones( // to the used set and removes itself from the used set. If any mutated // input is already used, a clone is induced. // - // Reads of sub-collections (select, read, and call nodes) use a collection - // because they may have downstream writes that depend on the new "view" of - // the same collection. This does not include reads that "end" (the `data` - // input of a write). This analysis does not consider parallel mutations in - // fork-joins, which are handled separately later in this function. + // Reads of sub-collections (select, some read, and some call nodes) use a + // collection because they may have downstream writes that depend on the new + // "sub-view" of the same collection. This does not include reads that "end" + // (most reads, some calls, the `data` input of a write). This analysis does + // not consider parallel mutations in fork-joins. let rev_po = control_subgraph.rev_po(NodeID::new(0)); let mut total_num_pts = 0; let mut bb_to_prefix_sum = vec![0; bbs.0.len()]; @@ -581,82 +581,85 @@ fn materialize_clones( total_num_pts += insts.len() + 1; } } - // Lattice maps each program point to a set of used values. Top is that no - // nodes are used yet. + // Lattice maps each program point to a set of used values and their + // possible users. Top is that no nodes are used yet. let nodes = &editor.func().nodes; let func_id = editor.func_id(); - let mut lattice: Vec<BTreeSet<NodeID>> = vec![BTreeSet::new(); total_num_pts]; + let meet = |left: &mut BTreeMap<NodeID, BTreeSet<NodeID>>, + right: &BTreeMap<NodeID, BTreeSet<NodeID>>| { + for (used, users) in right.into_iter() { + left.entry(*used).or_default().extend(users.into_iter()); + } + }; + let mut lattice: Vec<BTreeMap<NodeID, BTreeSet<NodeID>>> = vec![BTreeMap::new(); total_num_pts]; loop { let mut changed = false; for bb in rev_po.iter() { // The lattice value of the first point is the meet of the // predecessor terminating lattice values. - let mut top_value = take(&mut lattice[bb_to_prefix_sum[bb.idx()]]); + let old_top_value = &lattice[bb_to_prefix_sum[bb.idx()]]; + let mut new_top_value = BTreeMap::new(); // Clearing `top_value` is not necessary since used nodes are never // removed from lattice values, only added. for pred in control_subgraph.preds(*bb) { - // It should not be possible in Hercules IR for a basic block to - // be one of its own predecessors. - assert_ne!(*bb, pred); let last_pt = bbs.1[pred.idx()].len(); - for elem in lattice[bb_to_prefix_sum[pred.idx()] + last_pt].iter() { - changed |= top_value.insert(*elem); - } + meet( + &mut new_top_value, + &lattice[bb_to_prefix_sum[pred.idx()] + last_pt], + ); } - lattice[bb_to_prefix_sum[bb.idx()]] = top_value; + changed |= *old_top_value != new_top_value; + lattice[bb_to_prefix_sum[bb.idx()]] = new_top_value; // The lattice value of following points are determined by their // immediate preceding instructions. let insts = &bbs.1[bb.idx()]; for (prev_pt, inst) in insts.iter().enumerate() { - let mut new_value = take(&mut lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1]); + let old_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1]; let prev_value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt]; + let mut new_value = prev_value.clone(); match nodes[inst.idx()] { Node::Phi { control: _, ref data, } if !objects[&func_id].objects(*inst).is_empty() => { for elem in data { - changed |= new_value.insert(*elem); + new_value.entry(*elem).or_default().insert(*inst); } - changed |= new_value.remove(inst); + new_value.remove(inst); } Node::Ternary { op: TernaryOperator::Select, first: _, second, third, + } + | Node::Reduce { + control: _, + init: second, + reduct: third, } => { if !objects[&func_id].objects(*inst).is_empty() { - changed |= new_value.insert(second); - changed |= new_value.insert(third); - changed |= new_value.remove(inst); + new_value.entry(second).or_default().insert(*inst); + new_value.entry(third).or_default().insert(*inst); + new_value.remove(inst); } } - Node::Reduce { - control: _, - init, - reduct, - } if !objects[&func_id].objects(*inst).is_empty() => { - changed |= new_value.insert(init); - changed |= new_value.insert(reduct); - changed |= new_value.remove(inst); - } Node::Read { collect, indices: _, } if !objects[&func_id].objects(*inst).is_empty() => { - changed |= new_value.insert(collect); - changed |= new_value.remove(inst); + new_value.entry(collect).or_default().insert(*inst); + new_value.remove(inst); } Node::Write { collect, data: _, indices: _, } => { - changed |= new_value.insert(collect); - changed |= new_value.remove(inst); + new_value.entry(collect).or_default().insert(*inst); + new_value.remove(inst); } Node::Call { control: _, @@ -674,23 +677,21 @@ fn materialize_clones( }) .unwrap_or(false) { - changed |= new_value.insert(*arg); + new_value.entry(*arg).or_default().insert(*inst); } } - changed |= new_value.remove(inst); - } - _ => { - for elem in prev_value { - changed |= new_value.insert(*elem); - } + new_value.remove(inst); } + _ => {} } + changed |= *old_value != new_value; lattice[bb_to_prefix_sum[bb.idx()] + prev_pt + 1] = new_value; } // Handle reduces in this block specially at the very end. let last_pt = insts.len(); - let mut bottom_value = take(&mut lattice[bb_to_prefix_sum[bb.idx()] + last_pt]); + let old_bottom_value = &lattice[bb_to_prefix_sum[bb.idx()] + last_pt]; + let mut new_bottom_value = old_bottom_value.clone(); for inst in insts.iter() { if let Node::Reduce { control: _, @@ -699,13 +700,14 @@ fn materialize_clones( } = nodes[inst.idx()] { assert!( - bottom_value.contains(&reduct), + new_bottom_value.contains_key(&reduct), "PANIC: Can't handle clones inside a reduction cycle currently." ); - changed |= bottom_value.remove(inst); + new_bottom_value.remove(inst); } } - lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = bottom_value; + changed |= *old_bottom_value != new_bottom_value; + lattice[bb_to_prefix_sum[bb.idx()] + last_pt] = new_bottom_value; } if !changed { @@ -715,15 +717,17 @@ fn materialize_clones( // Now that we've computed the used collections dataflow analysis, use the // results to materialize a clone whenever a node attempts to use an already - // used node. - let mut any_induced = false; + // used node. As soon as any clone is found, return since that clone needs + // to get placed before other clones can be discovered. Traverse blocks in + // postorder so that clones inside loops are discovered before loop-induced + // clones. let nodes = nodes.clone(); - for bb in rev_po.iter() { + for bb in rev_po.iter().rev() { let insts = &bbs.1[bb.idx()]; // Accumulate predecessor bottom used sets for phis. Phis are special in // that they need to be path sensitive, but multiple phis in a single // block may use a single collection, and that needs to induce a clone. - let mut phi_acc_bottoms = HashMap::new(); + let mut phi_acc_bottoms = BTreeMap::new(); for (prev_pt, inst) in insts.iter().enumerate() { let value = &lattice[bb_to_prefix_sum[bb.idx()] + prev_pt]; match nodes[inst.idx()] { @@ -740,13 +744,13 @@ fn materialize_clones( let bottom = &lattice[bb_to_prefix_sum[pred.idx()] + last_pt]; bottom.clone() }); - if bottom.contains(arg) { - induce_clone(editor, typing, *arg, *inst); - any_induced = true; + if let Some(other_users) = bottom.get(&arg) { + induce_clone(editor, typing, *arg, *inst, other_users); + return true; } else { // Subsequent phis using `arg` along the same // predecessor induce a clone. - bottom.insert(*arg); + bottom.insert(*arg, once(*inst).collect()); } } } @@ -756,13 +760,13 @@ fn materialize_clones( second, third, } => { - if value.contains(&second) { - induce_clone(editor, typing, second, *inst); - any_induced = true; + if let Some(other_users) = value.get(&second) { + induce_clone(editor, typing, second, *inst, other_users); + return true; } - if value.contains(&third) { - induce_clone(editor, typing, third, *inst); - any_induced = true; + if let Some(other_users) = value.get(&third) { + induce_clone(editor, typing, third, *inst, other_users); + return true; } } Node::Reduce { @@ -770,18 +774,18 @@ fn materialize_clones( init, reduct: _, } => { - if value.contains(&init) { - induce_clone(editor, typing, init, *inst); - any_induced = true; + if let Some(other_users) = value.get(&init) { + induce_clone(editor, typing, init, *inst, other_users); + return true; } } Node::Read { collect, indices: _, } if !objects[&func_id].objects(*inst).is_empty() => { - if value.contains(&collect) { - induce_clone(editor, typing, collect, *inst); - any_induced = true; + if let Some(other_users) = value.get(&collect) { + induce_clone(editor, typing, collect, *inst, other_users); + return true; } } Node::Write { @@ -789,9 +793,9 @@ fn materialize_clones( data: _, indices: _, } => { - if value.contains(&collect) { - induce_clone(editor, typing, collect, *inst); - any_induced = true; + if let Some(other_users) = value.get(&collect) { + induce_clone(editor, typing, collect, *inst, other_users); + return true; } } Node::Call { @@ -809,10 +813,10 @@ fn materialize_clones( || callee_objects.returned_objects().contains(&object) }) .unwrap_or(false) - && value.contains(arg) + && let Some(other_users) = value.get(arg) { - induce_clone(editor, typing, *arg, *inst); - any_induced = true; + induce_clone(editor, typing, *arg, *inst, other_users); + return true; } } } @@ -820,27 +824,37 @@ fn materialize_clones( } } } - any_induced + false } /* * Utility to insert a clone before a use of a collection. */ -fn induce_clone(editor: &mut FunctionEditor, typing: &Vec<TypeID>, object: NodeID, user: NodeID) { - editor.edit(|mut edit| { - // Create the constant collection object for allocation. - let object_ty = typing[object.idx()]; - let object_cons = edit.add_zero_constant(object_ty); - let cons_node = edit.add_node(Node::Constant { id: object_cons }); - - // Create the clone into the new constant collection. - let clone_node = edit.add_node(Node::Write { - collect: cons_node, - data: object, - indices: vec![].into_boxed_slice(), +fn induce_clone( + editor: &mut FunctionEditor, + typing: &Vec<TypeID>, + object: NodeID, + user: NodeID, + other_users: &BTreeSet<NodeID>, +) { + if other_users.contains(&user) { + editor.edit(|mut edit| { + // Create the constant collection object for allocation. + let object_ty = typing[object.idx()]; + let object_cons = edit.add_zero_constant(object_ty); + let cons_node = edit.add_node(Node::Constant { id: object_cons }); + + // Create the clone into the new constant collection. + let clone_node = edit.add_node(Node::Write { + collect: cons_node, + data: object, + indices: vec![].into_boxed_slice(), + }); + + // Make user use the cloned object. + edit.replace_all_uses_where(object, clone_node, |id| *id == user) }); - - // Make user use the cloned object. - edit.replace_all_uses_where(object, clone_node, |id| *id == user) - }); + } else { + todo!() + } } -- GitLab From b01a31f3b3134cd1c40a6fa272441b6b7210c2cb Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Mon, 6 Jan 2025 22:27:43 -0800 Subject: [PATCH 03/14] Handle cloning writes in loops --- hercules_ir/src/dom.rs | 20 +++ .../src/legalize_reference_semantics.rs | 170 +++++++++++++----- juno_frontend/src/lib.rs | 2 + 3 files changed, 145 insertions(+), 47 deletions(-) diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs index f9046b43..2c0f085d 100644 --- a/hercules_ir/src/dom.rs +++ b/hercules_ir/src/dom.rs @@ -77,6 +77,26 @@ impl DomTree { .1 } + /* + * Find the node with the shallowest level in the dom tree amongst the nodes + * given. + */ + pub fn shallowest_amongst<I>(&self, x: I) -> NodeID + where + I: Iterator<Item = NodeID>, + { + x.map(|x| { + if x == self.root { + (0, x) + } else { + (self.idom[&x].0, x) + } + }) + .min_by(|x, y| x.0.cmp(&y.0)) + .unwrap() + .1 + } + /* * Find the least common ancestor in the tree of two nodes. This is an * ancestor of the two nodes that is as far down the tree as possible. diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index b3a106d5..ae5b5c3f 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -73,7 +73,7 @@ pub fn legalize_reference_semantics( return None; } }; - if materialize_clones(editor, typing, control_subgraph, objects, &bbs) { + if materialize_clones(editor, typing, control_subgraph, dom, objects, &bbs) { None } else { Some(bbs) @@ -533,6 +533,7 @@ fn materialize_clones( editor: &mut FunctionEditor, typing: &Vec<TypeID>, control_subgraph: &Subgraph, + dom: &DomTree, objects: &CollectionObjects, bbs: &BasicBlocks, ) -> bool { @@ -722,6 +723,113 @@ fn materialize_clones( // postorder so that clones inside loops are discovered before loop-induced // clones. let nodes = nodes.clone(); + let mut induce_clone = |object: NodeID, + user: NodeID, + value: &BTreeMap<NodeID, BTreeSet<NodeID>>| { + if !value[&object].contains(&user) { + editor.edit(|mut edit| { + // Create the constant collection object for allocation. + let object_ty = typing[object.idx()]; + let object_cons = edit.add_zero_constant(object_ty); + let cons_node = edit.add_node(Node::Constant { id: object_cons }); + + // Create the clone into the new constant collection. + let clone_node = edit.add_node(Node::Write { + collect: cons_node, + data: object, + indices: vec![].into_boxed_slice(), + }); + + // Make user use the cloned object. + edit.replace_all_uses_where(object, clone_node, |id| *id == user) + }); + } else { + // Figure out the downstream users of `user` that we need to phi + // back upwards. + let mut user_iterated_users: BTreeSet<NodeID> = BTreeSet::new(); + let mut workset = BTreeSet::new(); + workset.insert(user); + while let Some(pop) = workset.pop_first() { + user_iterated_users.insert(pop); + workset.extend( + value + .get(&pop) + .map(|users| users.into_iter()) + .into_iter() + .flatten(), + ); + } + + // Figure out which downstream users aren't themselves used - + // these are the nodes that get cycled through the phi. + let fringe_users: BTreeSet<NodeID> = user_iterated_users + .into_iter() + .filter(|user| !value.contains_key(&user)) + .collect(); + + // Figure out where to place that phi. This is the shallowest + // node in the domtree where `user` is responsible for making + // `object` used at the top of the block, and the block + // dominates the block containing `user`. + let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| { + lattice[bb_to_prefix_sum[bb.idx()]] + .get(&object) + .unwrap_or(&BTreeSet::new()) + .contains(&user) + && dom.does_dom(*bb, bbs.0[user.idx()]) + }); + let top_block = dom.shallowest_amongst(eligible_blocks); + assert!(editor.func().nodes[top_block.idx()].is_region()); + + // Figure out which fringe users, if any, dominate each + // predecessor of the top block. If none dominate a predecessor, + // then the input for that predecessor is the new collection + // constant. + let mut fringe_user_per_pred = vec![None; control_subgraph.preds(top_block).count()]; + for fringe_user in fringe_users { + for (idx, pred) in control_subgraph.preds(top_block).enumerate() { + if dom.does_dom(bbs.0[fringe_user.idx()], pred) { + assert!(fringe_user_per_pred[idx].is_none(), "PANIC: Multiple fringe users of a cloning write in a loop dominate the same predecessor of the loop header. This shouldn't be possible!"); + fringe_user_per_pred[idx] = Some(fringe_user); + } + } + } + assert!(fringe_user_per_pred.iter().any(|user|user.is_none()), "PANIC: No predecessor of the loop header containing a cloning write has no fringe user of that write. This shouldn't be possible!"); + + // Induce the clone. + editor.edit(|mut edit| { + // Create the constant collection object for allocation. + let object_ty = typing[object.idx()]; + let object_cons = edit.add_zero_constant(object_ty); + let cons_node = edit.add_node(Node::Constant { id: object_cons }); + + // Create the phi. + let phi_node = edit.add_node(Node::Phi { + control: top_block, + data: fringe_user_per_pred + .into_iter() + .map(|user| { + if let Some(user) = user { + user + } else { + cons_node + } + }) + .collect(), + }); + + // Create the clone into the phi. + let clone_node = edit.add_node(Node::Write { + collect: phi_node, + data: object, + indices: vec![].into_boxed_slice(), + }); + + // Make user use the cloned object. + edit.replace_all_uses_where(object, clone_node, |id| *id == user) + }); + } + }; for bb in rev_po.iter().rev() { let insts = &bbs.1[bb.idx()]; // Accumulate predecessor bottom used sets for phis. Phis are special in @@ -744,8 +852,8 @@ fn materialize_clones( let bottom = &lattice[bb_to_prefix_sum[pred.idx()] + last_pt]; bottom.clone() }); - if let Some(other_users) = bottom.get(&arg) { - induce_clone(editor, typing, *arg, *inst, other_users); + if bottom.contains_key(&arg) { + induce_clone(*arg, *inst, bottom); return true; } else { // Subsequent phis using `arg` along the same @@ -760,12 +868,12 @@ fn materialize_clones( second, third, } => { - if let Some(other_users) = value.get(&second) { - induce_clone(editor, typing, second, *inst, other_users); + if value.contains_key(&second) { + induce_clone(second, *inst, &value); return true; } - if let Some(other_users) = value.get(&third) { - induce_clone(editor, typing, third, *inst, other_users); + if value.contains_key(&third) { + induce_clone(third, *inst, &value); return true; } } @@ -774,8 +882,8 @@ fn materialize_clones( init, reduct: _, } => { - if let Some(other_users) = value.get(&init) { - induce_clone(editor, typing, init, *inst, other_users); + if value.contains_key(&init) { + induce_clone(init, *inst, &value); return true; } } @@ -783,8 +891,8 @@ fn materialize_clones( collect, indices: _, } if !objects[&func_id].objects(*inst).is_empty() => { - if let Some(other_users) = value.get(&collect) { - induce_clone(editor, typing, collect, *inst, other_users); + if value.contains_key(&collect) { + induce_clone(collect, *inst, &value); return true; } } @@ -793,8 +901,8 @@ fn materialize_clones( data: _, indices: _, } => { - if let Some(other_users) = value.get(&collect) { - induce_clone(editor, typing, collect, *inst, other_users); + if value.contains_key(&collect) { + induce_clone(collect, *inst, &value); return true; } } @@ -813,9 +921,9 @@ fn materialize_clones( || callee_objects.returned_objects().contains(&object) }) .unwrap_or(false) - && let Some(other_users) = value.get(arg) + && value.contains_key(arg) { - induce_clone(editor, typing, *arg, *inst, other_users); + induce_clone(*arg, *inst, value); return true; } } @@ -826,35 +934,3 @@ fn materialize_clones( } false } - -/* - * Utility to insert a clone before a use of a collection. - */ -fn induce_clone( - editor: &mut FunctionEditor, - typing: &Vec<TypeID>, - object: NodeID, - user: NodeID, - other_users: &BTreeSet<NodeID>, -) { - if other_users.contains(&user) { - editor.edit(|mut edit| { - // Create the constant collection object for allocation. - let object_ty = typing[object.idx()]; - let object_cons = edit.add_zero_constant(object_ty); - let cons_node = edit.add_node(Node::Constant { id: object_cons }); - - // Create the clone into the new constant collection. - let clone_node = edit.add_node(Node::Write { - collect: cons_node, - data: object, - indices: vec![].into_boxed_slice(), - }); - - // Make user use the cloned object. - edit.replace_all_uses_where(object, clone_node, |id| *id == user) - }); - } else { - todo!() - } -} diff --git a/juno_frontend/src/lib.rs b/juno_frontend/src/lib.rs index 0dd5cdd3..075c7a45 100644 --- a/juno_frontend/src/lib.rs +++ b/juno_frontend/src/lib.rs @@ -201,6 +201,8 @@ pub fn compile_ir( pm.add_pass(hercules_opt::pass::Pass::Xdot(true)); } + add_pass!(pm, verify, LegalizeReferenceSemantics); + add_verified_pass!(pm, verify, DCE); add_pass!(pm, verify, LegalizeReferenceSemantics); pm.add_pass(hercules_opt::pass::Pass::Codegen(output_dir, module_name)); pm.run_passes(); -- GitLab From 7041352d9e4a45e044ae9a244292ccb23567507c Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Mon, 6 Jan 2025 22:35:42 -0800 Subject: [PATCH 04/14] Fix how top block is picked --- hercules_opt/src/legalize_reference_semantics.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index ae5b5c3f..3ee393c0 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -73,7 +73,7 @@ pub fn legalize_reference_semantics( return None; } }; - if materialize_clones(editor, typing, control_subgraph, dom, objects, &bbs) { + if materialize_clones(editor, typing, control_subgraph, dom, loops, objects, &bbs) { None } else { Some(bbs) @@ -534,6 +534,7 @@ fn materialize_clones( typing: &Vec<TypeID>, control_subgraph: &Subgraph, dom: &DomTree, + loops: &LoopTree, objects: &CollectionObjects, bbs: &BasicBlocks, ) -> bool { @@ -767,18 +768,21 @@ fn materialize_clones( .filter(|user| !value.contains_key(&user)) .collect(); - // Figure out where to place that phi. This is the shallowest - // node in the domtree where `user` is responsible for making - // `object` used at the top of the block, and the block - // dominates the block containing `user`. + // Figure out where to place that phi. This is the deepest + // loop header where `user` is responsible for making `object` used + // used at the top of the block, and the block dominates the block + // containing `user`. let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| { lattice[bb_to_prefix_sum[bb.idx()]] .get(&object) .unwrap_or(&BTreeSet::new()) .contains(&user) && dom.does_dom(*bb, bbs.0[user.idx()]) + && loops.contains(*bb) }); - let top_block = dom.shallowest_amongst(eligible_blocks); + let top_block = eligible_blocks + .max_by_key(|bb| loops.nesting(*bb).unwrap()) + .unwrap(); assert!(editor.func().nodes[top_block.idx()].is_region()); // Figure out which fringe users, if any, dominate each -- GitLab From b8dcc346362e83a479e1eac12b217509374f2c54 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Mon, 6 Jan 2025 22:57:34 -0800 Subject: [PATCH 05/14] Fixes for double-loop clone --- hercules_opt/src/legalize_reference_semantics.rs | 10 ++++++++-- juno_samples/implicit_clone/src/implicit_clone.jn | 13 +++++++++++++ juno_samples/implicit_clone/src/main.rs | 4 ++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index 3ee393c0..b124aecf 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -222,6 +222,7 @@ fn basic_blocks( || root_block_iterated_users.contains(&mutator)) && mutating_objects(function, func_id, *mutator, objects) .any(|mutated| read_objs.contains(&mutated)) + && id != mutator { antideps.insert((*id, *mutator)); } @@ -757,7 +758,10 @@ fn materialize_clones( .get(&pop) .map(|users| users.into_iter()) .into_iter() - .flatten(), + .flatten() + .filter(|iterated_user| { + dom.does_dom(bbs.0[user.idx()], bbs.0[iterated_user.idx()]) + }), ); } @@ -771,7 +775,8 @@ fn materialize_clones( // Figure out where to place that phi. This is the deepest // loop header where `user` is responsible for making `object` used // used at the top of the block, and the block dominates the block - // containing `user`. + // containing `user`. If `user` is a phi, then the region it's + // attached to is excluded from eligibility. let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| { lattice[bb_to_prefix_sum[bb.idx()]] .get(&object) @@ -779,6 +784,7 @@ fn materialize_clones( .contains(&user) && dom.does_dom(*bb, bbs.0[user.idx()]) && loops.contains(*bb) + && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()]) }); let top_block = eligible_blocks .max_by_key(|bb| loops.nesting(*bb).unwrap()) diff --git a/juno_samples/implicit_clone/src/implicit_clone.jn b/juno_samples/implicit_clone/src/implicit_clone.jn index a2d6cba0..a4a28559 100644 --- a/juno_samples/implicit_clone/src/implicit_clone.jn +++ b/juno_samples/implicit_clone/src/implicit_clone.jn @@ -27,6 +27,19 @@ fn loop_implicit_clone(input : i32) -> i32 { return r + 7; } +#[entry] +fn double_loop_implicit_clone(a : usize, b : usize) -> i32 { + let x = 0; + for j = 0 to 2 { + for i = 0 to 5 { + let arr : i32[3]; + arr[a] += 7; + x += arr[b]; + } + } + return x / 2; +} + #[entry] fn no_implicit_clone(input : i32) -> i32 { let arr : i32[2]; diff --git a/juno_samples/implicit_clone/src/main.rs b/juno_samples/implicit_clone/src/main.rs index 45c722d7..036e4edb 100644 --- a/juno_samples/implicit_clone/src/main.rs +++ b/juno_samples/implicit_clone/src/main.rs @@ -15,6 +15,10 @@ fn main() { println!("{}", output); assert_eq!(output, 7); + let output = double_loop_implicit_clone(2, 2).await; + println!("{}", output); + assert_eq!(output, 35); + let output = no_implicit_clone(4).await; println!("{}", output); assert_eq!(output, 13); -- GitLab From 061a191e46f5af7e896153522dbd155ba9deca8f Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 7 Jan 2025 10:57:10 -0800 Subject: [PATCH 06/14] Fix for more complex loops --- .../src/legalize_reference_semantics.rs | 57 ++++++++++--------- .../implicit_clone/src/implicit_clone.jn | 6 +- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index b124aecf..6d41c773 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -746,32 +746,6 @@ fn materialize_clones( edit.replace_all_uses_where(object, clone_node, |id| *id == user) }); } else { - // Figure out the downstream users of `user` that we need to phi - // back upwards. - let mut user_iterated_users: BTreeSet<NodeID> = BTreeSet::new(); - let mut workset = BTreeSet::new(); - workset.insert(user); - while let Some(pop) = workset.pop_first() { - user_iterated_users.insert(pop); - workset.extend( - value - .get(&pop) - .map(|users| users.into_iter()) - .into_iter() - .flatten() - .filter(|iterated_user| { - dom.does_dom(bbs.0[user.idx()], bbs.0[iterated_user.idx()]) - }), - ); - } - - // Figure out which downstream users aren't themselves used - - // these are the nodes that get cycled through the phi. - let fringe_users: BTreeSet<NodeID> = user_iterated_users - .into_iter() - .filter(|user| !value.contains_key(&user)) - .collect(); - // Figure out where to place that phi. This is the deepest // loop header where `user` is responsible for making `object` used // used at the top of the block, and the block dominates the block @@ -791,6 +765,33 @@ fn materialize_clones( .unwrap(); assert!(editor.func().nodes[top_block.idx()].is_region()); + // `user` may just be the canary in the coal mine - there may be + // multiple users of `object` in this loop. Find all of them. + let all_loop_users: BTreeSet<_> = value[&object] + .iter() + .map(|id| *id) + .filter(|user| loops.is_in_loop(top_block, bbs.0[user.idx()])) + .collect(); + + // Figure out the fringe users of the users of `object` that we need + // to phi back upwards. + let mut fringe_users: BTreeSet<NodeID> = BTreeSet::new(); + let mut workset = all_loop_users; + while let Some(pop) = workset.pop_first() { + let iterated_users: BTreeSet<_> = value + .get(&pop) + .map(|users| users.into_iter()) + .into_iter() + .flatten() + .filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()])) + .collect(); + if iterated_users.is_empty() { + fringe_users.insert(pop); + } else { + workset.extend(iterated_users); + } + } + // Figure out which fringe users, if any, dominate each // predecessor of the top block. If none dominate a predecessor, // then the input for that predecessor is the new collection @@ -836,7 +837,9 @@ fn materialize_clones( }); // Make user use the cloned object. - edit.replace_all_uses_where(object, clone_node, |id| *id == user) + edit.replace_all_uses_where(object, clone_node, |id| { + id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()]) + }) }); } }; diff --git a/juno_samples/implicit_clone/src/implicit_clone.jn b/juno_samples/implicit_clone/src/implicit_clone.jn index a4a28559..a575ae59 100644 --- a/juno_samples/implicit_clone/src/implicit_clone.jn +++ b/juno_samples/implicit_clone/src/implicit_clone.jn @@ -33,7 +33,11 @@ fn double_loop_implicit_clone(a : usize, b : usize) -> i32 { for j = 0 to 2 { for i = 0 to 5 { let arr : i32[3]; - arr[a] += 7; + if a == b { + arr[a] += 7; + } else { + arr[a] += 1; + } x += arr[b]; } } -- GitLab From 97f5bb6dc749c46f5b12aaa8835e7e0c9e9d44a7 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 7 Jan 2025 13:46:35 -0800 Subject: [PATCH 07/14] More complex loops are broken --- .../src/legalize_reference_semantics.rs | 184 ++++++++++++++---- .../implicit_clone/src/implicit_clone.jn | 18 +- juno_samples/implicit_clone/src/main.rs | 6 +- 3 files changed, 168 insertions(+), 40 deletions(-) diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index 6d41c773..5e80ec73 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -718,18 +718,13 @@ fn materialize_clones( } } - // Now that we've computed the used collections dataflow analysis, use the - // results to materialize a clone whenever a node attempts to use an already - // used node. As soon as any clone is found, return since that clone needs - // to get placed before other clones can be discovered. Traverse blocks in - // postorder so that clones inside loops are discovered before loop-induced - // clones. + // Helper to induce a clone when an implicit clone is identified. let nodes = nodes.clone(); let mut induce_clone = |object: NodeID, user: NodeID, value: &BTreeMap<NodeID, BTreeSet<NodeID>>| { if !value[&object].contains(&user) { - editor.edit(|mut edit| { + let success = editor.edit(|mut edit| { // Create the constant collection object for allocation. let object_ty = typing[object.idx()]; let object_cons = edit.add_zero_constant(object_ty); @@ -745,6 +740,7 @@ fn materialize_clones( // Make user use the cloned object. edit.replace_all_uses_where(object, clone_node, |id| *id == user) }); + assert!(success); } else { // Figure out where to place that phi. This is the deepest // loop header where `user` is responsible for making `object` used @@ -758,6 +754,7 @@ fn materialize_clones( .contains(&user) && dom.does_dom(*bb, bbs.0[user.idx()]) && loops.contains(*bb) + && loops.is_in_loop(*bb, bbs.0[user.idx()]) && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()]) }); let top_block = eligible_blocks @@ -765,50 +762,149 @@ fn materialize_clones( .unwrap(); assert!(editor.func().nodes[top_block.idx()].is_region()); - // `user` may just be the canary in the coal mine - there may be - // multiple users of `object` in this loop. Find all of them. - let all_loop_users: BTreeSet<_> = value[&object] - .iter() - .map(|id| *id) - .filter(|user| loops.is_in_loop(top_block, bbs.0[user.idx()])) - .collect(); - - // Figure out the fringe users of the users of `object` that we need - // to phi back upwards. - let mut fringe_users: BTreeSet<NodeID> = BTreeSet::new(); - let mut workset = all_loop_users; + // Figure out the users of `object` that we need to phi back + // upwards. Assign each user a number indicating how far down the + // user chain it is, higher is farther down. This is used for + // picking the most downstream user later. + let mut users: BTreeMap<NodeID, usize> = BTreeMap::new(); + let mut workset = BTreeSet::new(); + workset.insert(object); + let mut chain_ordering = 1; while let Some(pop) = workset.pop_first() { let iterated_users: BTreeSet<_> = value .get(&pop) .map(|users| users.into_iter()) .into_iter() .flatten() + .map(|id| *id) .filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()])) .collect(); - if iterated_users.is_empty() { - fringe_users.insert(pop); - } else { - workset.extend(iterated_users); + workset.extend(&iterated_users); + for user in iterated_users { + *users.entry(user).or_default() = chain_ordering; + chain_ordering += 1; } } - // Figure out which fringe users, if any, dominate each - // predecessor of the top block. If none dominate a predecessor, - // then the input for that predecessor is the new collection - // constant. - let mut fringe_user_per_pred = vec![None; control_subgraph.preds(top_block).count()]; - for fringe_user in fringe_users { - for (idx, pred) in control_subgraph.preds(top_block).enumerate() { - if dom.does_dom(bbs.0[fringe_user.idx()], pred) { - assert!(fringe_user_per_pred[idx].is_none(), "PANIC: Multiple fringe users of a cloning write in a loop dominate the same predecessor of the loop header. This shouldn't be possible!"); - fringe_user_per_pred[idx] = Some(fringe_user); + // The fringe users may not dominate any predecessors of the loop + // header. The following is some Juno code that exposes this: + // + // fn problematic(a : size) -> i32 { + // for i = 0 to a { + // let arr : i32[1]; + // for j = 0 to a { + // arr[0] = 1; + // } + // } + // return 0; + // } + // + // Note that `arr` induces a clone each iteration, since its value + // needs to be reset to all zeros. However, it should also be noted + // that the most fringe user of `arr`, the write inside the inner + // loop, does not dominate the bottom of the outer loop. Thus, we + // need to insert a phi in the bottom block of the outer loop to + // retrieve either the write, or `arr` before the inner loop. Create + // a dummy phi that will be replaced with the real phi we create + // later. This is needed for finding downstream users below. + let mut dummy_phi = NodeID::new(0); + let success = editor.edit(|mut edit| { + dummy_phi = edit.add_node(Node::Phi { + control: top_block, + data: empty().collect(), + }); + Ok(edit) + }); + assert!(success); + users.insert(dummy_phi, 0); + + // Figure out which downstream user is phi-ed back up on each + // predecessor. For a predecessor not in the loop, no downstream + // user is phi-ed back up, rather the newly allocation collection + // is used. + let mut downstream_user_per_pred = + vec![None; control_subgraph.preds(top_block).count()]; + let mut added_phi_bbs: BTreeMap<NodeID, NodeID> = BTreeMap::new(); + added_phi_bbs.insert(dummy_phi, top_block); + let lowest_dominating_user = + |bb: NodeID, + users: &BTreeMap<NodeID, usize>, + added_phi_bbs: &BTreeMap<NodeID, NodeID>| { + users + .iter() + .filter(|(user, _)| { + let user_bb = added_phi_bbs + .get(user) + .unwrap_or_else(|| &bbs.0[user.idx()]); + dom.does_dom(*user_bb, bb) + }) + .max_by_key(|(_, order)| *order) + .map(|(user, _)| *user) + }; + for (idx, pred) in control_subgraph.preds(top_block).enumerate() { + if !loops.is_in_loop(top_block, pred) { + continue; + } + + // Figure out which, if any, downstream user dominates this + // predecessor. If none do, traverse predecessors until reaching + // a region, and figure out which downstream users dominate the + // region - create a phi joining those users, and the phi now + // dominates the predecessor. This is recursive. Created phis + // get added to `users`, so that duplicate phis aren't made. The + // basic blocks of the added phis are also tracked, since they + // are needed for domination checking. + let mut downstream_user = NodeID::new(0); + let mut worklist = VecDeque::from(vec![pred]); + while let Some(bb) = worklist.pop_front() { + if let Some(lowest_dominating_user) = + lowest_dominating_user(bb, &users, &added_phi_bbs) + { + // If there is a dominating user of this block, then + // nothing needs to be added - record this as the + // downstream user, and the last assignment of this form + // will be from the immediate predecessor of the loop + // header. + downstream_user = lowest_dominating_user; + } else if let Some(preds_lowest_dominating_users) = control_subgraph + .preds(bb) + .map(|pred| lowest_dominating_user(pred, &users, &added_phi_bbs)) + .collect::<Option<Vec<_>>>() + { + // If there isn't a dominating user of this block yet, + // but each predecessor has a dominating user, create a + // phi over those users. + assert!(preds_lowest_dominating_users.len() > 1); + let success = editor.edit(|mut edit| { + let phi_node = edit.add_node(Node::Phi { + control: bb, + data: preds_lowest_dominating_users.into_boxed_slice(), + }); + added_phi_bbs.insert(phi_node, bb); + users.insert(phi_node, chain_ordering); + chain_ordering += 1; + downstream_user = phi_node; + Ok(edit) + }); + assert!(success); + } else { + // If the predecessors don't have dominating users yet, + // push them to the stack. Also push ourselves back on + // the stack to be revisited. + for pred in control_subgraph.preds(bb) { + assert!(loops.is_in_loop(top_block, pred)); + worklist.push_back(pred); + } + worklist.push_back(bb); } } + downstream_user_per_pred[idx] = Some(downstream_user); } - assert!(fringe_user_per_pred.iter().any(|user|user.is_none()), "PANIC: No predecessor of the loop header containing a cloning write has no fringe user of that write. This shouldn't be possible!"); + assert!(downstream_user_per_pred.iter().any(|user|user.is_none()), "PANIC: No predecessor of the loop header containing a cloning write has no downstream user of that write. This shouldn't be possible!"); + assert!(downstream_user_per_pred.iter().any(|user|user.is_some()), "PANIC: No predecessor of the loop header containing a cloning write has a downstream user of that write. This shouldn't be possible!"); // Induce the clone. - editor.edit(|mut edit| { + let success = editor.edit(|mut edit| { // Create the constant collection object for allocation. let object_ty = typing[object.idx()]; let object_cons = edit.add_zero_constant(object_ty); @@ -817,7 +913,7 @@ fn materialize_clones( // Create the phi. let phi_node = edit.add_node(Node::Phi { control: top_block, - data: fringe_user_per_pred + data: downstream_user_per_pred .into_iter() .map(|user| { if let Some(user) = user { @@ -837,12 +933,24 @@ fn materialize_clones( }); // Make user use the cloned object. - edit.replace_all_uses_where(object, clone_node, |id| { + let edit = edit.replace_all_uses_where(object, clone_node, |id| { id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()]) - }) + })?; + + // Get rid of the dummy phi. + let edit = edit.replace_all_uses(dummy_phi, phi_node)?; + edit.delete_node(dummy_phi) }); + assert!(success); } }; + + // Now that we've computed the used collections dataflow analysis, use the + // results to materialize a clone whenever a node attempts to use an already + // used node. As soon as any clone is found, return since that clone needs + // to get placed before other clones can be discovered. Traverse blocks in + // postorder so that clones inside loops are discovered before loop-induced + // clones. for bb in rev_po.iter().rev() { let insts = &bbs.1[bb.idx()]; // Accumulate predecessor bottom used sets for phis. Phis are special in diff --git a/juno_samples/implicit_clone/src/implicit_clone.jn b/juno_samples/implicit_clone/src/implicit_clone.jn index a575ae59..7017942c 100644 --- a/juno_samples/implicit_clone/src/implicit_clone.jn +++ b/juno_samples/implicit_clone/src/implicit_clone.jn @@ -27,21 +27,37 @@ fn loop_implicit_clone(input : i32) -> i32 { return r + 7; } +#[entry] +fn tricky_loop_implicit_clone(a : usize) -> i32 { + for i = 0 to a { + let arr : i32[1]; + for j = 0 to a { + arr[0] = 1; + } + } + return 42; +} + #[entry] fn double_loop_implicit_clone(a : usize, b : usize) -> i32 { let x = 0; for j = 0 to 2 { for i = 0 to 5 { let arr : i32[3]; + //let arr2 : i32[1]; if a == b { arr[a] += 7; } else { arr[a] += 1; } + for k = 0 to (a + b - 1) { + arr[a] += 2; + //arr2[0] += 1; + } x += arr[b]; } } - return x / 2; + return x; } #[entry] diff --git a/juno_samples/implicit_clone/src/main.rs b/juno_samples/implicit_clone/src/main.rs index 036e4edb..508de775 100644 --- a/juno_samples/implicit_clone/src/main.rs +++ b/juno_samples/implicit_clone/src/main.rs @@ -15,9 +15,13 @@ fn main() { println!("{}", output); assert_eq!(output, 7); + let output = tricky_loop_implicit_clone(3).await; + println!("{}", output); + assert_eq!(output, 42); + let output = double_loop_implicit_clone(2, 2).await; println!("{}", output); - assert_eq!(output, 35); + assert_eq!(output, 130); let output = no_implicit_clone(4).await; println!("{}", output); -- GitLab From 734c6183c663d5ef56189f446f72ac8576c403ec Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 7 Jan 2025 15:41:38 -0800 Subject: [PATCH 08/14] Fix loop induced clones --- .../src/legalize_reference_semantics.rs | 184 +++++++----------- 1 file changed, 71 insertions(+), 113 deletions(-) diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index 5e80ec73..21a42056 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -804,104 +804,60 @@ fn materialize_clones( // that the most fringe user of `arr`, the write inside the inner // loop, does not dominate the bottom of the outer loop. Thus, we // need to insert a phi in the bottom block of the outer loop to - // retrieve either the write, or `arr` before the inner loop. Create - // a dummy phi that will be replaced with the real phi we create - // later. This is needed for finding downstream users below. - let mut dummy_phi = NodeID::new(0); - let success = editor.edit(|mut edit| { - dummy_phi = edit.add_node(Node::Phi { - control: top_block, - data: empty().collect(), - }); - Ok(edit) - }); - assert!(success); - users.insert(dummy_phi, 0); - - // Figure out which downstream user is phi-ed back up on each - // predecessor. For a predecessor not in the loop, no downstream - // user is phi-ed back up, rather the newly allocation collection - // is used. - let mut downstream_user_per_pred = - vec![None; control_subgraph.preds(top_block).count()]; - let mut added_phi_bbs: BTreeMap<NodeID, NodeID> = BTreeMap::new(); - added_phi_bbs.insert(dummy_phi, top_block); - let lowest_dominating_user = - |bb: NodeID, - users: &BTreeMap<NodeID, usize>, - added_phi_bbs: &BTreeMap<NodeID, NodeID>| { - users - .iter() - .filter(|(user, _)| { - let user_bb = added_phi_bbs - .get(user) - .unwrap_or_else(|| &bbs.0[user.idx()]); - dom.does_dom(*user_bb, bb) - }) - .max_by_key(|(_, order)| *order) - .map(|(user, _)| *user) - }; - for (idx, pred) in control_subgraph.preds(top_block).enumerate() { - if !loops.is_in_loop(top_block, pred) { - continue; + // retrieve either the write, or `arr` before the inner loop. The + // general version of this problem requires the following solution. + // Our goal is to figure out which downstream user represents + // `object` at each block in the loop. We first assign each block + // containing a user the most downstream user it contains. Then, we + // create a dummy phi for every region (including the header) in the + // loop, which is the downstream user for that block. Then, every + // other block is assigned the downstream user of its single + // predecessor. This basically amounts to recreating SSA for + // `object` inside the loop. + let mut user_per_loop_bb = BTreeMap::new(); + let mut added_phis = BTreeMap::new(); + let mut top_phi = NodeID::new(0); + // Assign existing users. + for (user, ordering) in users.iter() { + if let Some(old_user) = user_per_loop_bb.get(&bbs.0[user.idx()]) + && users[old_user] > *ordering + { + } else { + user_per_loop_bb.insert(bbs.0[user.idx()], *user); } - - // Figure out which, if any, downstream user dominates this - // predecessor. If none do, traverse predecessors until reaching - // a region, and figure out which downstream users dominate the - // region - create a phi joining those users, and the phi now - // dominates the predecessor. This is recursive. Created phis - // get added to `users`, so that duplicate phis aren't made. The - // basic blocks of the added phis are also tracked, since they - // are needed for domination checking. - let mut downstream_user = NodeID::new(0); - let mut worklist = VecDeque::from(vec![pred]); - while let Some(bb) = worklist.pop_front() { - if let Some(lowest_dominating_user) = - lowest_dominating_user(bb, &users, &added_phi_bbs) - { - // If there is a dominating user of this block, then - // nothing needs to be added - record this as the - // downstream user, and the last assignment of this form - // will be from the immediate predecessor of the loop - // header. - downstream_user = lowest_dominating_user; - } else if let Some(preds_lowest_dominating_users) = control_subgraph - .preds(bb) - .map(|pred| lowest_dominating_user(pred, &users, &added_phi_bbs)) - .collect::<Option<Vec<_>>>() - { - // If there isn't a dominating user of this block yet, - // but each predecessor has a dominating user, create a - // phi over those users. - assert!(preds_lowest_dominating_users.len() > 1); - let success = editor.edit(|mut edit| { - let phi_node = edit.add_node(Node::Phi { - control: bb, - data: preds_lowest_dominating_users.into_boxed_slice(), - }); - added_phi_bbs.insert(phi_node, bb); - users.insert(phi_node, chain_ordering); - chain_ordering += 1; - downstream_user = phi_node; - Ok(edit) + } + // Assign dummy phis. + for bb in loops.nodes_in_loop(top_block) { + if (!user_per_loop_bb.contains_key(&bb) || bb == top_block) + && editor.func().nodes[bb.idx()].is_region() + { + let success = editor.edit(|mut edit| { + let phi_node = edit.add_node(Node::Phi { + control: bb, + data: empty().collect(), }); - assert!(success); - } else { - // If the predecessors don't have dominating users yet, - // push them to the stack. Also push ourselves back on - // the stack to be revisited. - for pred in control_subgraph.preds(bb) { - assert!(loops.is_in_loop(top_block, pred)); - worklist.push_back(pred); + if bb != top_block || !user_per_loop_bb.contains_key(&bb) { + user_per_loop_bb.insert(bb, phi_node); } - worklist.push_back(bb); - } + if bb == top_block { + top_phi = phi_node; + } + added_phis.insert(phi_node, bb); + Ok(edit) + }); + assert!(success); + } + } + // Assign users for the rest of the blocks. + for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) { + if !user_per_loop_bb.contains_key(&bb) { + assert!(control_subgraph.preds(*bb).count() == 1); + user_per_loop_bb.insert( + *bb, + user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()], + ); } - downstream_user_per_pred[idx] = Some(downstream_user); } - assert!(downstream_user_per_pred.iter().any(|user|user.is_none()), "PANIC: No predecessor of the loop header containing a cloning write has no downstream user of that write. This shouldn't be possible!"); - assert!(downstream_user_per_pred.iter().any(|user|user.is_some()), "PANIC: No predecessor of the loop header containing a cloning write has a downstream user of that write. This shouldn't be possible!"); // Induce the clone. let success = editor.edit(|mut edit| { @@ -910,36 +866,38 @@ fn materialize_clones( let object_cons = edit.add_zero_constant(object_ty); let cons_node = edit.add_node(Node::Constant { id: object_cons }); - // Create the phi. - let phi_node = edit.add_node(Node::Phi { - control: top_block, - data: downstream_user_per_pred - .into_iter() - .map(|user| { - if let Some(user) = user { - user - } else { - cons_node - } - }) - .collect(), - }); - // Create the clone into the phi. let clone_node = edit.add_node(Node::Write { - collect: phi_node, + collect: top_phi, data: object, indices: vec![].into_boxed_slice(), }); + // Create the phis. + let mut real_phis = BTreeMap::new(); + for (dummy, bb) in added_phis { + let real = edit.add_node(Node::Phi { + control: bb, + data: control_subgraph + .preds(bb) + .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node)) + .collect(), + }); + real_phis.insert(dummy, real); + } + // Make user use the cloned object. - let edit = edit.replace_all_uses_where(object, clone_node, |id| { + edit = edit.replace_all_uses_where(object, clone_node, |id| { id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()]) })?; - // Get rid of the dummy phi. - let edit = edit.replace_all_uses(dummy_phi, phi_node)?; - edit.delete_node(dummy_phi) + // Get rid of the dummy phis. + for (dummy, real) in real_phis { + edit = edit.replace_all_uses(dummy, real)?; + edit = edit.delete_node(dummy)?; + } + + Ok(edit) }); assert!(success); } -- GitLab From 371c2df209ee07c98f1f4023fb71a1f138cb3981 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 7 Jan 2025 16:47:42 -0800 Subject: [PATCH 09/14] DCE is needed --- hercules_opt/src/dce.rs | 3 +-- hercules_opt/src/legalize_reference_semantics.rs | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hercules_opt/src/dce.rs b/hercules_opt/src/dce.rs index 75268694..839e9ad1 100644 --- a/hercules_opt/src/dce.rs +++ b/hercules_opt/src/dce.rs @@ -6,8 +6,7 @@ use self::hercules_ir::ir::*; use crate::*; /* - * Top level function to run dead code elimination. Deletes nodes by setting - * nodes to gravestones. Works with a function already containing gravestones. + * Top level function to run dead code elimination. */ pub fn dce(editor: &mut FunctionEditor) { // Create worklist (starts as all nodes). diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index 21a42056..4c467b4f 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -900,6 +900,8 @@ fn materialize_clones( Ok(edit) }); assert!(success); + + dce(editor); } }; -- GitLab From 45a630a1d86d971f2864e33dd50c62b2df59d885 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 7 Jan 2025 17:34:11 -0800 Subject: [PATCH 10/14] More fixes --- .../src/legalize_reference_semantics.rs | 347 +++++++++--------- 1 file changed, 178 insertions(+), 169 deletions(-) diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index 4c467b4f..c4f58745 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -717,193 +717,202 @@ fn materialize_clones( break; } } + let mut super_value = BTreeMap::new(); + for value in lattice.iter() { + meet(&mut super_value, value); + } // Helper to induce a clone when an implicit clone is identified. let nodes = nodes.clone(); - let mut induce_clone = |object: NodeID, - user: NodeID, - value: &BTreeMap<NodeID, BTreeSet<NodeID>>| { - if !value[&object].contains(&user) { - let success = editor.edit(|mut edit| { - // Create the constant collection object for allocation. - let object_ty = typing[object.idx()]; - let object_cons = edit.add_zero_constant(object_ty); - let cons_node = edit.add_node(Node::Constant { id: object_cons }); - - // Create the clone into the new constant collection. - let clone_node = edit.add_node(Node::Write { - collect: cons_node, - data: object, - indices: vec![].into_boxed_slice(), + let mut induce_clone = + |object: NodeID, user: NodeID, value: &BTreeMap<NodeID, BTreeSet<NodeID>>| { + // If `user` already used `object` and tries to use it again, then the + // clone is a "loop induced" clone. Otherwise, it's a simple clone. + if !value[&object].contains(&user) { + let success = editor.edit(|mut edit| { + // Create the constant collection object for allocation. + let object_ty = typing[object.idx()]; + let object_cons = edit.add_zero_constant(object_ty); + let cons_node = edit.add_node(Node::Constant { id: object_cons }); + + // Create the clone into the new constant collection. + let clone_node = edit.add_node(Node::Write { + collect: cons_node, + data: object, + indices: vec![].into_boxed_slice(), + }); + + // Make user use the cloned object. + edit.replace_all_uses_where(object, clone_node, |id| *id == user) + }); + assert!(success); + } else { + // Figure out where to place that phi. This is the deepest + // loop header where `user` is responsible for making `object` used + // used at the top of the block, and the block dominates the block + // containing `user`. If `user` is a phi, then the region it's + // attached to is excluded from eligibility. + let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| { + lattice[bb_to_prefix_sum[bb.idx()]] + .get(&object) + .unwrap_or(&BTreeSet::new()) + .contains(&user) + && dom.does_dom(*bb, bbs.0[user.idx()]) + && loops.contains(*bb) + && loops.is_in_loop(*bb, bbs.0[user.idx()]) + && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()]) }); + let top_block = eligible_blocks + .max_by_key(|bb| loops.nesting(*bb).unwrap()) + .unwrap(); + assert!(editor.func().nodes[top_block.idx()].is_region()); - // Make user use the cloned object. - edit.replace_all_uses_where(object, clone_node, |id| *id == user) - }); - assert!(success); - } else { - // Figure out where to place that phi. This is the deepest - // loop header where `user` is responsible for making `object` used - // used at the top of the block, and the block dominates the block - // containing `user`. If `user` is a phi, then the region it's - // attached to is excluded from eligibility. - let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| { - lattice[bb_to_prefix_sum[bb.idx()]] - .get(&object) - .unwrap_or(&BTreeSet::new()) - .contains(&user) - && dom.does_dom(*bb, bbs.0[user.idx()]) - && loops.contains(*bb) - && loops.is_in_loop(*bb, bbs.0[user.idx()]) - && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()]) - }); - let top_block = eligible_blocks - .max_by_key(|bb| loops.nesting(*bb).unwrap()) - .unwrap(); - assert!(editor.func().nodes[top_block.idx()].is_region()); - - // Figure out the users of `object` that we need to phi back - // upwards. Assign each user a number indicating how far down the - // user chain it is, higher is farther down. This is used for - // picking the most downstream user later. - let mut users: BTreeMap<NodeID, usize> = BTreeMap::new(); - let mut workset = BTreeSet::new(); - workset.insert(object); - let mut chain_ordering = 1; - while let Some(pop) = workset.pop_first() { - let iterated_users: BTreeSet<_> = value - .get(&pop) - .map(|users| users.into_iter()) - .into_iter() - .flatten() - .map(|id| *id) - .filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()])) - .collect(); - workset.extend(&iterated_users); - for user in iterated_users { - *users.entry(user).or_default() = chain_ordering; - chain_ordering += 1; + // Figure out the users of `object` that we need to phi back + // upwards. Assign each user a number indicating how far down the + // user chain it is, higher is farther down. This is used for + // picking the most downstream user later. + let mut users: BTreeMap<NodeID, usize> = BTreeMap::new(); + let mut workset: BTreeSet<NodeID> = BTreeSet::new(); + workset.insert(object); + let mut chain_ordering = 1; + while let Some(pop) = workset.pop_first() { + let iterated_users: BTreeSet<_> = super_value + .get(&pop) + .map(|users| users.into_iter()) + .into_iter() + .flatten() + .map(|id| *id) + .filter(|iterated_user| { + loops.is_in_loop(top_block, bbs.0[iterated_user.idx()]) + && dom.does_dom(bbs.0[pop.idx()], bbs.0[iterated_user.idx()]) + }) + .collect(); + workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id))); + for user in iterated_users { + *users.entry(user).or_default() = chain_ordering; + chain_ordering += 1; + } } - } - // The fringe users may not dominate any predecessors of the loop - // header. The following is some Juno code that exposes this: - // - // fn problematic(a : size) -> i32 { - // for i = 0 to a { - // let arr : i32[1]; - // for j = 0 to a { - // arr[0] = 1; - // } - // } - // return 0; - // } - // - // Note that `arr` induces a clone each iteration, since its value - // needs to be reset to all zeros. However, it should also be noted - // that the most fringe user of `arr`, the write inside the inner - // loop, does not dominate the bottom of the outer loop. Thus, we - // need to insert a phi in the bottom block of the outer loop to - // retrieve either the write, or `arr` before the inner loop. The - // general version of this problem requires the following solution. - // Our goal is to figure out which downstream user represents - // `object` at each block in the loop. We first assign each block - // containing a user the most downstream user it contains. Then, we - // create a dummy phi for every region (including the header) in the - // loop, which is the downstream user for that block. Then, every - // other block is assigned the downstream user of its single - // predecessor. This basically amounts to recreating SSA for - // `object` inside the loop. - let mut user_per_loop_bb = BTreeMap::new(); - let mut added_phis = BTreeMap::new(); - let mut top_phi = NodeID::new(0); - // Assign existing users. - for (user, ordering) in users.iter() { - if let Some(old_user) = user_per_loop_bb.get(&bbs.0[user.idx()]) - && users[old_user] > *ordering - { - } else { - user_per_loop_bb.insert(bbs.0[user.idx()], *user); + // The fringe users may not dominate any predecessors of the loop + // header. The following is some Juno code that exposes this: + // + // fn problematic(a : size) -> i32 { + // for i = 0 to a { + // let arr : i32[1]; + // for j = 0 to a { + // arr[0] = 1; + // } + // } + // return 0; + // } + // + // Note that `arr` induces a clone each iteration, since its value + // needs to be reset to all zeros. However, it should also be noted + // that the most fringe user of `arr`, the write inside the inner + // loop, does not dominate the bottom of the outer loop. Thus, we + // need to insert a phi in the bottom block of the outer loop to + // retrieve either the write, or `arr` before the inner loop. The + // general version of this problem requires the following solution. + // Our goal is to figure out which downstream user represents + // `object` at each block in the loop. We first assign each block + // containing a user the most downstream user it contains. Then, we + // create a dummy phi for every region (including the header) in the + // loop, which is the downstream user for that block. Then, every + // other block is assigned the downstream user of its single + // predecessor. This basically amounts to recreating SSA for + // `object` inside the loop. + let mut user_per_loop_bb = BTreeMap::new(); + let mut added_phis = BTreeMap::new(); + let mut top_phi = NodeID::new(0); + // Assign existing users. + for (user, ordering) in users.iter() { + let bb = bbs.0[user.idx()]; + if let Some(old_user) = user_per_loop_bb.get(&bb) + && users[old_user] > *ordering + { + } else { + user_per_loop_bb.insert(bb, *user); + } } - } - // Assign dummy phis. - for bb in loops.nodes_in_loop(top_block) { - if (!user_per_loop_bb.contains_key(&bb) || bb == top_block) - && editor.func().nodes[bb.idx()].is_region() - { - let success = editor.edit(|mut edit| { - let phi_node = edit.add_node(Node::Phi { - control: bb, - data: empty().collect(), + // Assign dummy phis. + for bb in loops.nodes_in_loop(top_block) { + if (!user_per_loop_bb.contains_key(&bb) || bb == top_block) + && editor.func().nodes[bb.idx()].is_region() + { + let success = editor.edit(|mut edit| { + let phi_node = edit.add_node(Node::Phi { + control: bb, + data: empty().collect(), + }); + if bb != top_block || !user_per_loop_bb.contains_key(&bb) { + user_per_loop_bb.insert(bb, phi_node); + } + if bb == top_block { + top_phi = phi_node; + } + added_phis.insert(phi_node, bb); + Ok(edit) }); - if bb != top_block || !user_per_loop_bb.contains_key(&bb) { - user_per_loop_bb.insert(bb, phi_node); - } - if bb == top_block { - top_phi = phi_node; - } - added_phis.insert(phi_node, bb); - Ok(edit) - }); - assert!(success); + assert!(success); + } } - } - // Assign users for the rest of the blocks. - for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) { - if !user_per_loop_bb.contains_key(&bb) { - assert!(control_subgraph.preds(*bb).count() == 1); - user_per_loop_bb.insert( - *bb, - user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()], - ); + // Assign users for the rest of the blocks. + for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) { + if !user_per_loop_bb.contains_key(&bb) { + assert!(control_subgraph.preds(*bb).count() == 1); + user_per_loop_bb.insert( + *bb, + user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()], + ); + } } - } - // Induce the clone. - let success = editor.edit(|mut edit| { - // Create the constant collection object for allocation. - let object_ty = typing[object.idx()]; - let object_cons = edit.add_zero_constant(object_ty); - let cons_node = edit.add_node(Node::Constant { id: object_cons }); - - // Create the clone into the phi. - let clone_node = edit.add_node(Node::Write { - collect: top_phi, - data: object, - indices: vec![].into_boxed_slice(), - }); + // Induce the clone. + let success = editor.edit(|mut edit| { + // Create the constant collection object for allocation. + let object_ty = typing[object.idx()]; + let object_cons = edit.add_zero_constant(object_ty); + let cons_node = edit.add_node(Node::Constant { id: object_cons }); - // Create the phis. - let mut real_phis = BTreeMap::new(); - for (dummy, bb) in added_phis { - let real = edit.add_node(Node::Phi { - control: bb, - data: control_subgraph - .preds(bb) - .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node)) - .collect(), + // Create the phis. + let mut real_phis = BTreeMap::new(); + for (dummy, bb) in added_phis { + let real = edit.add_node(Node::Phi { + control: bb, + data: control_subgraph + .preds(bb) + .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node)) + .collect(), + }); + real_phis.insert(dummy, real); + } + + // Create the clone into the phi. + let clone_node = edit.add_node(Node::Write { + collect: real_phis[&top_phi], + data: object, + indices: vec![].into_boxed_slice(), }); - real_phis.insert(dummy, real); - } - // Make user use the cloned object. - edit = edit.replace_all_uses_where(object, clone_node, |id| { - id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()]) - })?; + // Make user use the cloned object. + edit = edit.replace_all_uses_where(object, clone_node, |id| { + id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()]) + })?; - // Get rid of the dummy phis. - for (dummy, real) in real_phis { - edit = edit.replace_all_uses(dummy, real)?; - edit = edit.delete_node(dummy)?; - } + // Get rid of the dummy phis. + for (dummy, real) in real_phis { + edit = edit.replace_all_uses(dummy, real)?; + edit = edit.delete_node(dummy)?; + } - Ok(edit) - }); - assert!(success); + Ok(edit) + }); + assert!(success); - dce(editor); - } - }; + dce(editor); + } + }; // Now that we've computed the used collections dataflow analysis, use the // results to materialize a clone whenever a node attempts to use an already -- GitLab From 54df5a02de5694aa00a14f460b1c74c9c95297ab Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 7 Jan 2025 17:52:07 -0800 Subject: [PATCH 11/14] ugh --- hercules_ir/src/ir.rs | 13 +++++++++++ hercules_opt/src/gvn.rs | 12 ++++++++-- .../src/legalize_reference_semantics.rs | 4 ++++ hercules_opt/src/pass.rs | 8 +++++-- .../implicit_clone/src/implicit_clone.jn | 23 +++++++++++++++---- juno_samples/implicit_clone/src/main.rs | 4 ++-- juno_samples/matmul/src/matmul.jn | 5 ++-- 7 files changed, 56 insertions(+), 13 deletions(-) diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs index 11d23e61..8bb8e4ef 100644 --- a/hercules_ir/src/ir.rs +++ b/hercules_ir/src/ir.rs @@ -1296,6 +1296,19 @@ impl Node { } } + pub fn try_write(&self) -> Option<(NodeID, NodeID, &[Index])> { + if let Node::Write { + collect, + data, + indices, + } = self + { + Some((*collect, *data, indices)) + } else { + None + } + } + pub fn is_zero_constant(&self, constants: &Vec<Constant>) -> bool { if let Node::Constant { id } = self && constants[id.idx()].is_zero() diff --git a/hercules_opt/src/gvn.rs b/hercules_opt/src/gvn.rs index a9db0cd8..5f7138e5 100644 --- a/hercules_opt/src/gvn.rs +++ b/hercules_opt/src/gvn.rs @@ -11,7 +11,7 @@ use crate::*; * fairly simple compared to in a normal CFG. Needs access to constants for * identity function simplification. */ -pub fn gvn(editor: &mut FunctionEditor) { +pub fn gvn(editor: &mut FunctionEditor, gvn_constants_and_clones: bool) { // Create worklist (starts as all nodes) and value number hashmap. let mut worklist: Vec<NodeID> = (0..editor.func().nodes.len()).map(NodeID::new).collect(); let mut value_numbers: HashMap<Node, NodeID> = HashMap::new(); @@ -28,7 +28,15 @@ pub fn gvn(editor: &mut FunctionEditor) { // Next, check if there is a value number for this simplified value yet. if let Some(number) = value_numbers.get(&editor.func().nodes[value.idx()]) { // If the number is this worklist item, there's nothing to be done. - if *number == work { + // Also, don't GVN constants and clones if indicated to not do so. + if *number == work + || (!gvn_constants_and_clones + && (editor.func().nodes[work.idx()].is_constant() + || editor.func().nodes[work.idx()] + .try_write() + .map(|(_, _, indices)| indices.is_empty()) + .unwrap_or(false))) + { continue; } diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index c4f58745..f60c8a7f 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -910,6 +910,10 @@ fn materialize_clones( }); assert!(success); + // De-duplicate phis. + gvn(editor, false); + + // Get rid of unused phis. dce(editor); } }; diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs index d0449a4a..d2ef3f28 100644 --- a/hercules_opt/src/pass.rs +++ b/hercules_opt/src/pass.rs @@ -395,7 +395,7 @@ impl PassManager { &types_ref, &def_uses[idx], ); - gvn(&mut editor); + gvn(&mut editor, true); self.module.constants = constants_ref.take(); self.module.dynamic_constants = dynamic_constants_ref.take(); @@ -828,7 +828,11 @@ impl PassManager { &types_ref, &def_uses[idx], ); - infer_parallel_reduce(&mut editor, &fork_join_maps[idx], &reduce_cycles[idx]); + infer_parallel_reduce( + &mut editor, + &fork_join_maps[idx], + &reduce_cycles[idx], + ); infer_parallel_fork(&mut editor, &fork_join_maps[idx]); infer_vectorizable(&mut editor, &fork_join_maps[idx]); infer_tight_associative(&mut editor, &reduce_cycles[idx]); diff --git a/juno_samples/implicit_clone/src/implicit_clone.jn b/juno_samples/implicit_clone/src/implicit_clone.jn index 7017942c..a66cfc21 100644 --- a/juno_samples/implicit_clone/src/implicit_clone.jn +++ b/juno_samples/implicit_clone/src/implicit_clone.jn @@ -28,7 +28,7 @@ fn loop_implicit_clone(input : i32) -> i32 { } #[entry] -fn tricky_loop_implicit_clone(a : usize) -> i32 { +fn double_loop_implicit_clone(a : usize) -> usize { for i = 0 to a { let arr : i32[1]; for j = 0 to a { @@ -39,12 +39,12 @@ fn tricky_loop_implicit_clone(a : usize) -> i32 { } #[entry] -fn double_loop_implicit_clone(a : usize, b : usize) -> i32 { +fn tricky_loop_implicit_clone(a : usize, b : usize) -> i32 { let x = 0; for j = 0 to 2 { for i = 0 to 5 { let arr : i32[3]; - //let arr2 : i32[1]; + let arr2 : i32[1]; if a == b { arr[a] += 7; } else { @@ -52,11 +52,26 @@ fn double_loop_implicit_clone(a : usize, b : usize) -> i32 { } for k = 0 to (a + b - 1) { arr[a] += 2; - //arr2[0] += 1; + arr2[0] += 1; } x += arr[b]; } } + return x + 70; +} + +#[entry] +fn tricky2_loop_implicit_clone(a : usize, b : usize) -> i32 { + let x = 0; + for i = 0 to 3 { + let arr : i32[1]; + if a == b { + arr[0] = 6; + } else { + arr[0] = 9; + } + x += arr[0]; + } return x; } diff --git a/juno_samples/implicit_clone/src/main.rs b/juno_samples/implicit_clone/src/main.rs index 508de775..764b1df4 100644 --- a/juno_samples/implicit_clone/src/main.rs +++ b/juno_samples/implicit_clone/src/main.rs @@ -15,11 +15,11 @@ fn main() { println!("{}", output); assert_eq!(output, 7); - let output = tricky_loop_implicit_clone(3).await; + let output = double_loop_implicit_clone(3).await; println!("{}", output); assert_eq!(output, 42); - let output = double_loop_implicit_clone(2, 2).await; + let output = tricky_loop_implicit_clone(2, 2).await; println!("{}", output); assert_eq!(output, 130); diff --git a/juno_samples/matmul/src/matmul.jn b/juno_samples/matmul/src/matmul.jn index 16a262b9..b9ca2949 100644 --- a/juno_samples/matmul/src/matmul.jn +++ b/juno_samples/matmul/src/matmul.jn @@ -20,8 +20,8 @@ fn tiled_64_matmul<n : usize, m : usize, l : usize>(a : i32[n, m], b : i32[m, l] for bi = 0 to n / 64 { for bk = 0 to l / 64 { - let atile : i32[64, 64]; - let btile : i32[64, 64]; + let atile : i32[66, 64]; + let btile : i32[65, 64]; let ctile : i32[64, 64]; for tile_idx = 0 to m / 64 { @@ -33,7 +33,6 @@ fn tiled_64_matmul<n : usize, m : usize, l : usize>(a : i32[n, m], b : i32[m, l] // should see a lack of a phi for ctile in the block loops and // induce a copy of an initial value of ctile (all zeros) on each // iteration of the block loops. - ctile[ti, tk] = 0; } } for ti = 0 to 64 { -- GitLab From d190f629b11bb41f23a710a4be82782bde313a0d Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 7 Jan 2025 18:07:27 -0800 Subject: [PATCH 12/14] ugh fix --- .../src/legalize_reference_semantics.rs | 350 +++++++++--------- .../implicit_clone/src/implicit_clone.jn | 1 + 2 files changed, 175 insertions(+), 176 deletions(-) diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index f60c8a7f..f76fe831 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -724,199 +724,197 @@ fn materialize_clones( // Helper to induce a clone when an implicit clone is identified. let nodes = nodes.clone(); - let mut induce_clone = - |object: NodeID, user: NodeID, value: &BTreeMap<NodeID, BTreeSet<NodeID>>| { - // If `user` already used `object` and tries to use it again, then the - // clone is a "loop induced" clone. Otherwise, it's a simple clone. - if !value[&object].contains(&user) { - let success = editor.edit(|mut edit| { - // Create the constant collection object for allocation. - let object_ty = typing[object.idx()]; - let object_cons = edit.add_zero_constant(object_ty); - let cons_node = edit.add_node(Node::Constant { id: object_cons }); + let mut induce_clone = |object: NodeID, + user: NodeID, + value: &BTreeMap<NodeID, BTreeSet<NodeID>>| { + // If `user` already used `object` and tries to use it again, then the + // clone is a "loop induced" clone. Otherwise, it's a simple clone. + if !value[&object].contains(&user) { + let success = editor.edit(|mut edit| { + // Create the constant collection object for allocation. + let object_ty = typing[object.idx()]; + let object_cons = edit.add_zero_constant(object_ty); + let cons_node = edit.add_node(Node::Constant { id: object_cons }); - // Create the clone into the new constant collection. - let clone_node = edit.add_node(Node::Write { - collect: cons_node, - data: object, - indices: vec![].into_boxed_slice(), - }); - - // Make user use the cloned object. - edit.replace_all_uses_where(object, clone_node, |id| *id == user) - }); - assert!(success); - } else { - // Figure out where to place that phi. This is the deepest - // loop header where `user` is responsible for making `object` used - // used at the top of the block, and the block dominates the block - // containing `user`. If `user` is a phi, then the region it's - // attached to is excluded from eligibility. - let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| { - lattice[bb_to_prefix_sum[bb.idx()]] - .get(&object) - .unwrap_or(&BTreeSet::new()) - .contains(&user) - && dom.does_dom(*bb, bbs.0[user.idx()]) - && loops.contains(*bb) - && loops.is_in_loop(*bb, bbs.0[user.idx()]) - && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()]) + // Create the clone into the new constant collection. + let clone_node = edit.add_node(Node::Write { + collect: cons_node, + data: object, + indices: vec![].into_boxed_slice(), }); - let top_block = eligible_blocks - .max_by_key(|bb| loops.nesting(*bb).unwrap()) - .unwrap(); - assert!(editor.func().nodes[top_block.idx()].is_region()); - // Figure out the users of `object` that we need to phi back - // upwards. Assign each user a number indicating how far down the - // user chain it is, higher is farther down. This is used for - // picking the most downstream user later. - let mut users: BTreeMap<NodeID, usize> = BTreeMap::new(); - let mut workset: BTreeSet<NodeID> = BTreeSet::new(); - workset.insert(object); - let mut chain_ordering = 1; - while let Some(pop) = workset.pop_first() { - let iterated_users: BTreeSet<_> = super_value - .get(&pop) - .map(|users| users.into_iter()) - .into_iter() - .flatten() - .map(|id| *id) - .filter(|iterated_user| { - loops.is_in_loop(top_block, bbs.0[iterated_user.idx()]) - && dom.does_dom(bbs.0[pop.idx()], bbs.0[iterated_user.idx()]) - }) - .collect(); - workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id))); - for user in iterated_users { - *users.entry(user).or_default() = chain_ordering; - chain_ordering += 1; - } + // Make user use the cloned object. + edit.replace_all_uses_where(object, clone_node, |id| *id == user) + }); + assert!(success); + } else { + // Figure out where to place that phi. This is the deepest + // loop header where `user` is responsible for making `object` used + // used at the top of the block, and the block dominates the block + // containing `user`. If `user` is a phi, then the region it's + // attached to is excluded from eligibility. + let eligible_blocks = rev_po.iter().map(|bb| *bb).filter(|bb| { + lattice[bb_to_prefix_sum[bb.idx()]] + .get(&object) + .unwrap_or(&BTreeSet::new()) + .contains(&user) + && dom.does_dom(*bb, bbs.0[user.idx()]) + && loops.contains(*bb) + && loops.is_in_loop(*bb, bbs.0[user.idx()]) + && (!editor.func().nodes[user.idx()].is_phi() || *bb != bbs.0[user.idx()]) + }); + let top_block = eligible_blocks + .max_by_key(|bb| loops.nesting(*bb).unwrap()) + .unwrap(); + assert!(editor.func().nodes[top_block.idx()].is_region()); + + // Figure out the users of `object` that we need to phi back + // upwards. Assign each user a number indicating how far down the + // user chain it is, higher is farther down. This is used for + // picking the most downstream user later. + let mut users: BTreeMap<NodeID, usize> = BTreeMap::new(); + let mut workset: BTreeSet<NodeID> = BTreeSet::new(); + workset.insert(object); + let mut chain_ordering = 1; + while let Some(pop) = workset.pop_first() { + let iterated_users: BTreeSet<_> = super_value + .get(&pop) + .map(|users| users.into_iter()) + .into_iter() + .flatten() + .map(|id| *id) + .filter(|iterated_user| loops.is_in_loop(top_block, bbs.0[iterated_user.idx()])) + .collect(); + workset.extend(iterated_users.iter().filter(|id| !users.contains_key(id))); + for user in iterated_users { + *users.entry(user).or_default() = chain_ordering; + chain_ordering += 1; } + } - // The fringe users may not dominate any predecessors of the loop - // header. The following is some Juno code that exposes this: - // - // fn problematic(a : size) -> i32 { - // for i = 0 to a { - // let arr : i32[1]; - // for j = 0 to a { - // arr[0] = 1; - // } - // } - // return 0; - // } - // - // Note that `arr` induces a clone each iteration, since its value - // needs to be reset to all zeros. However, it should also be noted - // that the most fringe user of `arr`, the write inside the inner - // loop, does not dominate the bottom of the outer loop. Thus, we - // need to insert a phi in the bottom block of the outer loop to - // retrieve either the write, or `arr` before the inner loop. The - // general version of this problem requires the following solution. - // Our goal is to figure out which downstream user represents - // `object` at each block in the loop. We first assign each block - // containing a user the most downstream user it contains. Then, we - // create a dummy phi for every region (including the header) in the - // loop, which is the downstream user for that block. Then, every - // other block is assigned the downstream user of its single - // predecessor. This basically amounts to recreating SSA for - // `object` inside the loop. - let mut user_per_loop_bb = BTreeMap::new(); - let mut added_phis = BTreeMap::new(); - let mut top_phi = NodeID::new(0); - // Assign existing users. - for (user, ordering) in users.iter() { - let bb = bbs.0[user.idx()]; - if let Some(old_user) = user_per_loop_bb.get(&bb) - && users[old_user] > *ordering - { - } else { - user_per_loop_bb.insert(bb, *user); - } + // The fringe users may not dominate any predecessors of the loop + // header. The following is some Juno code that exposes this: + // + // fn problematic(a : size) -> i32 { + // for i = 0 to a { + // let arr : i32[1]; + // for j = 0 to a { + // arr[0] = 1; + // } + // } + // return 0; + // } + // + // Note that `arr` induces a clone each iteration, since its value + // needs to be reset to all zeros. However, it should also be noted + // that the most fringe user of `arr`, the write inside the inner + // loop, does not dominate the bottom of the outer loop. Thus, we + // need to insert a phi in the bottom block of the outer loop to + // retrieve either the write, or `arr` before the inner loop. The + // general version of this problem requires the following solution. + // Our goal is to figure out which downstream user represents + // `object` at each block in the loop. We first assign each block + // containing a user the most downstream user it contains. Then, we + // create a dummy phi for every region (including the header) in the + // loop, which is the downstream user for that block. Then, every + // other block is assigned the downstream user of its single + // predecessor. This basically amounts to recreating SSA for + // `object` inside the loop. + let mut user_per_loop_bb = BTreeMap::new(); + let mut added_phis = BTreeMap::new(); + let mut top_phi = NodeID::new(0); + // Assign existing users. + for (user, ordering) in users.iter() { + let bb = bbs.0[user.idx()]; + if let Some(old_user) = user_per_loop_bb.get(&bb) + && users[old_user] > *ordering + { + } else { + user_per_loop_bb.insert(bb, *user); } - // Assign dummy phis. - for bb in loops.nodes_in_loop(top_block) { - if (!user_per_loop_bb.contains_key(&bb) || bb == top_block) - && editor.func().nodes[bb.idx()].is_region() - { - let success = editor.edit(|mut edit| { - let phi_node = edit.add_node(Node::Phi { - control: bb, - data: empty().collect(), - }); - if bb != top_block || !user_per_loop_bb.contains_key(&bb) { - user_per_loop_bb.insert(bb, phi_node); - } - if bb == top_block { - top_phi = phi_node; - } - added_phis.insert(phi_node, bb); - Ok(edit) + } + // Assign dummy phis. + for bb in loops.nodes_in_loop(top_block) { + if (!user_per_loop_bb.contains_key(&bb) || bb == top_block) + && editor.func().nodes[bb.idx()].is_region() + { + let success = editor.edit(|mut edit| { + let phi_node = edit.add_node(Node::Phi { + control: bb, + data: empty().collect(), }); - assert!(success); - } + if bb != top_block || !user_per_loop_bb.contains_key(&bb) { + user_per_loop_bb.insert(bb, phi_node); + } + if bb == top_block { + top_phi = phi_node; + } + added_phis.insert(phi_node, bb); + Ok(edit) + }); + assert!(success); } - // Assign users for the rest of the blocks. - for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) { - if !user_per_loop_bb.contains_key(&bb) { - assert!(control_subgraph.preds(*bb).count() == 1); - user_per_loop_bb.insert( - *bb, - user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()], - ); - } + } + // Assign users for the rest of the blocks. + for bb in rev_po.iter().filter(|bb| loops.is_in_loop(top_block, **bb)) { + if !user_per_loop_bb.contains_key(&bb) { + assert!(control_subgraph.preds(*bb).count() == 1); + user_per_loop_bb.insert( + *bb, + user_per_loop_bb[&control_subgraph.preds(*bb).next().unwrap()], + ); } + } - // Induce the clone. - let success = editor.edit(|mut edit| { - // Create the constant collection object for allocation. - let object_ty = typing[object.idx()]; - let object_cons = edit.add_zero_constant(object_ty); - let cons_node = edit.add_node(Node::Constant { id: object_cons }); + // Induce the clone. + let success = editor.edit(|mut edit| { + // Create the constant collection object for allocation. + let object_ty = typing[object.idx()]; + let object_cons = edit.add_zero_constant(object_ty); + let cons_node = edit.add_node(Node::Constant { id: object_cons }); - // Create the phis. - let mut real_phis = BTreeMap::new(); - for (dummy, bb) in added_phis { - let real = edit.add_node(Node::Phi { - control: bb, - data: control_subgraph - .preds(bb) - .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node)) - .collect(), - }); - real_phis.insert(dummy, real); - } - - // Create the clone into the phi. - let clone_node = edit.add_node(Node::Write { - collect: real_phis[&top_phi], - data: object, - indices: vec![].into_boxed_slice(), + // Create the phis. + let mut real_phis = BTreeMap::new(); + for (dummy, bb) in added_phis { + let real = edit.add_node(Node::Phi { + control: bb, + data: control_subgraph + .preds(bb) + .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node)) + .collect(), }); + real_phis.insert(dummy, real); + } - // Make user use the cloned object. - edit = edit.replace_all_uses_where(object, clone_node, |id| { - id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()]) - })?; + // Create the clone into the phi. + let clone_node = edit.add_node(Node::Write { + collect: real_phis[&top_phi], + data: object, + indices: vec![].into_boxed_slice(), + }); - // Get rid of the dummy phis. - for (dummy, real) in real_phis { - edit = edit.replace_all_uses(dummy, real)?; - edit = edit.delete_node(dummy)?; - } + // Make users use the cloned object. + edit = edit.replace_all_uses_where(object, clone_node, |id| { + id.idx() < bbs.0.len() && loops.is_in_loop(top_block, bbs.0[id.idx()]) + })?; - Ok(edit) - }); - assert!(success); + // Get rid of the dummy phis. + for (dummy, real) in real_phis { + edit = edit.replace_all_uses(dummy, real)?; + edit = edit.delete_node(dummy)?; + } - // De-duplicate phis. - gvn(editor, false); + Ok(edit) + }); + assert!(success); - // Get rid of unused phis. - dce(editor); - } - }; + // De-duplicate phis. + gvn(editor, false); + + // Get rid of unused phis. + dce(editor); + } + }; // Now that we've computed the used collections dataflow analysis, use the // results to materialize a clone whenever a node attempts to use an already diff --git a/juno_samples/implicit_clone/src/implicit_clone.jn b/juno_samples/implicit_clone/src/implicit_clone.jn index a66cfc21..64fa34da 100644 --- a/juno_samples/implicit_clone/src/implicit_clone.jn +++ b/juno_samples/implicit_clone/src/implicit_clone.jn @@ -70,6 +70,7 @@ fn tricky2_loop_implicit_clone(a : usize, b : usize) -> i32 { } else { arr[0] = 9; } + arr[0] = 7; x += arr[0]; } return x; -- GitLab From 6864929c97765625edbb362605c3870cde0ab29d Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 7 Jan 2025 18:42:26 -0800 Subject: [PATCH 13/14] finally --- .../src/legalize_reference_semantics.rs | 18 ++++++++++++++---- .../implicit_clone/src/implicit_clone.jn | 6 ++++-- juno_samples/implicit_clone/src/main.rs | 4 ++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/hercules_opt/src/legalize_reference_semantics.rs b/hercules_opt/src/legalize_reference_semantics.rs index f76fe831..3c89c7da 100644 --- a/hercules_opt/src/legalize_reference_semantics.rs +++ b/hercules_opt/src/legalize_reference_semantics.rs @@ -874,7 +874,8 @@ fn materialize_clones( let cons_node = edit.add_node(Node::Constant { id: object_cons }); // Create the phis. - let mut real_phis = BTreeMap::new(); + let mut phi_map = BTreeMap::new(); + let mut real_phis = BTreeSet::new(); for (dummy, bb) in added_phis { let real = edit.add_node(Node::Phi { control: bb, @@ -883,12 +884,14 @@ fn materialize_clones( .map(|pred| *user_per_loop_bb.get(&pred).unwrap_or(&cons_node)) .collect(), }); - real_phis.insert(dummy, real); + phi_map.insert(dummy, real); + real_phis.insert(real); } // Create the clone into the phi. + let real_top_phi = phi_map[&top_phi]; let clone_node = edit.add_node(Node::Write { - collect: real_phis[&top_phi], + collect: real_top_phi, data: object, indices: vec![].into_boxed_slice(), }); @@ -899,11 +902,15 @@ fn materialize_clones( })?; // Get rid of the dummy phis. - for (dummy, real) in real_phis { + for (dummy, real) in phi_map { edit = edit.replace_all_uses(dummy, real)?; edit = edit.delete_node(dummy)?; } + // Make phis use the clone instead of the top phi. + edit = + edit.replace_all_uses_where(real_top_phi, clone_node, |id| *id != clone_node)?; + Ok(edit) }); assert!(success); @@ -913,6 +920,9 @@ fn materialize_clones( // Get rid of unused phis. dce(editor); + + // Simplify phis. + phi_elim(editor); } }; diff --git a/juno_samples/implicit_clone/src/implicit_clone.jn b/juno_samples/implicit_clone/src/implicit_clone.jn index 64fa34da..d06a6498 100644 --- a/juno_samples/implicit_clone/src/implicit_clone.jn +++ b/juno_samples/implicit_clone/src/implicit_clone.jn @@ -57,7 +57,7 @@ fn tricky_loop_implicit_clone(a : usize, b : usize) -> i32 { x += arr[b]; } } - return x + 70; + return x; } #[entry] @@ -70,7 +70,9 @@ fn tricky2_loop_implicit_clone(a : usize, b : usize) -> i32 { } else { arr[0] = 9; } - arr[0] = 7; + for j = 0 to 4 { + arr[0] += 1; + } x += arr[0]; } return x; diff --git a/juno_samples/implicit_clone/src/main.rs b/juno_samples/implicit_clone/src/main.rs index 764b1df4..c0adaaef 100644 --- a/juno_samples/implicit_clone/src/main.rs +++ b/juno_samples/implicit_clone/src/main.rs @@ -23,6 +23,10 @@ fn main() { println!("{}", output); assert_eq!(output, 130); + let output = tricky2_loop_implicit_clone(2, 3).await; + println!("{}", output); + assert_eq!(output, 39); + let output = no_implicit_clone(4).await; println!("{}", output); assert_eq!(output, 13); -- GitLab From e632507e46eea340313a216c566b10a59ed547a2 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 7 Jan 2025 18:49:30 -0800 Subject: [PATCH 14/14] Hack because I want to sleep --- hercules_opt/src/pass.rs | 2 +- juno_samples/matmul/src/matmul.jn | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs index d2ef3f28..baeaae87 100644 --- a/hercules_opt/src/pass.rs +++ b/hercules_opt/src/pass.rs @@ -395,7 +395,7 @@ impl PassManager { &types_ref, &def_uses[idx], ); - gvn(&mut editor, true); + gvn(&mut editor, false); self.module.constants = constants_ref.take(); self.module.dynamic_constants = dynamic_constants_ref.take(); diff --git a/juno_samples/matmul/src/matmul.jn b/juno_samples/matmul/src/matmul.jn index b9ca2949..f59cc215 100644 --- a/juno_samples/matmul/src/matmul.jn +++ b/juno_samples/matmul/src/matmul.jn @@ -20,8 +20,8 @@ fn tiled_64_matmul<n : usize, m : usize, l : usize>(a : i32[n, m], b : i32[m, l] for bi = 0 to n / 64 { for bk = 0 to l / 64 { - let atile : i32[66, 64]; - let btile : i32[65, 64]; + let atile : i32[64, 64]; + let btile : i32[64, 64]; let ctile : i32[64, 64]; for tile_idx = 0 to m / 64 { @@ -29,10 +29,6 @@ fn tiled_64_matmul<n : usize, m : usize, l : usize>(a : i32[n, m], b : i32[m, l] for tk = 0 to 64 { atile[ti, tk] = a[bi * 64 + ti, tile_idx * 64 + tk]; btile[ti, tk] = b[tile_idx * 64 + ti, bk * 64 + tk]; - // TODO: remove setting ctile to zero explicitly, clone analysis - // should see a lack of a phi for ctile in the block loops and - // induce a copy of an initial value of ctile (all zeros) on each - // iteration of the block loops. } } for ti = 0 to 64 { -- GitLab