Skip to content
Snippets Groups Projects
Commit 350cee4d authored by rarbore2's avatar rarbore2
Browse files

Merge branch 'clone_detection6' into 'main'

Proper loop induced clone detection

See merge request !95
parents 949f3a8a 525ac54f
No related branches found
No related tags found
1 merge request!95Proper loop induced clone detection
Pipeline #200897 passed
......@@ -77,6 +77,26 @@ impl DomTree {
.1
}
/*
* Find the node with the shallowest level in the dom tree amongst the nodes
* given.
*/
pub fn shallowest_amongst<I>(&self, x: I) -> NodeID
where
I: Iterator<Item = NodeID>,
{
x.map(|x| {
if x == self.root {
(0, x)
} else {
(self.idom[&x].0, x)
}
})
.min_by(|x, y| x.0.cmp(&y.0))
.unwrap()
.1
}
/*
* Find the least common ancestor in the tree of two nodes. This is an
* ancestor of the two nodes that is as far down the tree as possible.
......
......@@ -1296,6 +1296,19 @@ impl Node {
}
}
pub fn try_write(&self) -> Option<(NodeID, NodeID, &[Index])> {
if let Node::Write {
collect,
data,
indices,
} = self
{
Some((*collect, *data, indices))
} else {
None
}
}
pub fn is_zero_constant(&self, constants: &Vec<Constant>) -> bool {
if let Node::Constant { id } = self
&& constants[id.idx()].is_zero()
......
......@@ -6,8 +6,7 @@ use self::hercules_ir::ir::*;
use crate::*;
/*
* Top level function to run dead code elimination. Deletes nodes by setting
* nodes to gravestones. Works with a function already containing gravestones.
* Top level function to run dead code elimination.
*/
pub fn dce(editor: &mut FunctionEditor) {
// Create worklist (starts as all nodes).
......
......@@ -11,7 +11,7 @@ use crate::*;
* fairly simple compared to in a normal CFG. Needs access to constants for
* identity function simplification.
*/
pub fn gvn(editor: &mut FunctionEditor) {
pub fn gvn(editor: &mut FunctionEditor, gvn_constants_and_clones: bool) {
// Create worklist (starts as all nodes) and value number hashmap.
let mut worklist: Vec<NodeID> = (0..editor.func().nodes.len()).map(NodeID::new).collect();
let mut value_numbers: HashMap<Node, NodeID> = HashMap::new();
......@@ -28,7 +28,15 @@ pub fn gvn(editor: &mut FunctionEditor) {
// Next, check if there is a value number for this simplified value yet.
if let Some(number) = value_numbers.get(&editor.func().nodes[value.idx()]) {
// If the number is this worklist item, there's nothing to be done.
if *number == work {
// Also, don't GVN constants and clones if indicated to not do so.
if *number == work
|| (!gvn_constants_and_clones
&& (editor.func().nodes[work.idx()].is_constant()
|| editor.func().nodes[work.idx()]
.try_write()
.map(|(_, _, indices)| indices.is_empty())
.unwrap_or(false)))
{
continue;
}
......
This diff is collapsed.
......@@ -395,7 +395,7 @@ impl PassManager {
&types_ref,
&def_uses[idx],
);
gvn(&mut editor);
gvn(&mut editor, false);
self.module.constants = constants_ref.take();
self.module.dynamic_constants = dynamic_constants_ref.take();
......@@ -828,7 +828,11 @@ impl PassManager {
&types_ref,
&def_uses[idx],
);
infer_parallel_reduce(&mut editor, &fork_join_maps[idx], &reduce_cycles[idx]);
infer_parallel_reduce(
&mut editor,
&fork_join_maps[idx],
&reduce_cycles[idx],
);
infer_parallel_fork(&mut editor, &fork_join_maps[idx]);
infer_vectorizable(&mut editor, &fork_join_maps[idx]);
infer_tight_associative(&mut editor, &reduce_cycles[idx]);
......
......@@ -201,6 +201,8 @@ pub fn compile_ir(
pm.add_pass(hercules_opt::pass::Pass::Xdot(true));
}
add_pass!(pm, verify, LegalizeReferenceSemantics);
add_verified_pass!(pm, verify, DCE);
add_pass!(pm, verify, LegalizeReferenceSemantics);
pm.add_pass(hercules_opt::pass::Pass::Codegen(output_dir, module_name));
pm.run_passes();
......
......@@ -27,6 +27,57 @@ fn loop_implicit_clone(input : i32) -> i32 {
return r + 7;
}
#[entry]
fn double_loop_implicit_clone(a : usize) -> usize {
for i = 0 to a {
let arr : i32[1];
for j = 0 to a {
arr[0] = 1;
}
}
return 42;
}
#[entry]
fn tricky_loop_implicit_clone(a : usize, b : usize) -> i32 {
let x = 0;
for j = 0 to 2 {
for i = 0 to 5 {
let arr : i32[3];
let arr2 : i32[1];
if a == b {
arr[a] += 7;
} else {
arr[a] += 1;
}
for k = 0 to (a + b - 1) {
arr[a] += 2;
arr2[0] += 1;
}
x += arr[b];
}
}
return x;
}
#[entry]
fn tricky2_loop_implicit_clone(a : usize, b : usize) -> i32 {
let x = 0;
for i = 0 to 3 {
let arr : i32[1];
if a == b {
arr[0] = 6;
} else {
arr[0] = 9;
}
for j = 0 to 4 {
arr[0] += 1;
}
x += arr[0];
}
return x;
}
#[entry]
fn no_implicit_clone(input : i32) -> i32 {
let arr : i32[2];
......
......@@ -15,6 +15,18 @@ fn main() {
println!("{}", output);
assert_eq!(output, 7);
let output = double_loop_implicit_clone(3).await;
println!("{}", output);
assert_eq!(output, 42);
let output = tricky_loop_implicit_clone(2, 2).await;
println!("{}", output);
assert_eq!(output, 130);
let output = tricky2_loop_implicit_clone(2, 3).await;
println!("{}", output);
assert_eq!(output, 39);
let output = no_implicit_clone(4).await;
println!("{}", output);
assert_eq!(output, 13);
......
......@@ -20,10 +20,8 @@ fn tiled_64_matmul<n : usize, m : usize, l : usize>(a : i32[n, m], b : i32[m, l]
for bi = 0 to n / 64 {
for bk = 0 to l / 64 {
// TODO: make these all the same size, clone analysis should undo GVN's
// combining of these three arrays.
let atile : i32[66, 64];
let btile : i32[65, 64];
let atile : i32[64, 64];
let btile : i32[64, 64];
let ctile : i32[64, 64];
for tile_idx = 0 to m / 64 {
......@@ -31,11 +29,6 @@ fn tiled_64_matmul<n : usize, m : usize, l : usize>(a : i32[n, m], b : i32[m, l]
for tk = 0 to 64 {
atile[ti, tk] = a[bi * 64 + ti, tile_idx * 64 + tk];
btile[ti, tk] = b[tile_idx * 64 + ti, bk * 64 + tk];
// TODO: remove setting ctile to zero explicitly, clone analysis
// should see a lack of a phi for ctile in the block loops and
// induce a copy of an initial value of ctile (all zeros) on each
// iteration of the block loops.
ctile[ti, tk] = 0;
}
}
for ti = 0 to 64 {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment