diff --git a/hercules_opt/src/fork_transforms.rs b/hercules_opt/src/fork_transforms.rs index e635b3c00d7bfa0090376d8056e65d8d01e60ce2..ae3dfe22bd73dd1df395f25c45db6b3f27c386a5 100644 --- a/hercules_opt/src/fork_transforms.rs +++ b/hercules_opt/src/fork_transforms.rs @@ -695,7 +695,8 @@ pub fn fork_coalesce_helper( editor.edit(|mut edit| { let new_tid = edit.add_node(new_tid); - let edit = edit.replace_all_uses(tid, new_tid)?; + let mut edit = edit.replace_all_uses(tid, new_tid)?; + edit.sub_edit(tid, new_tid); Ok(edit) }); } @@ -1598,3 +1599,8 @@ pub fn clean_monoid_reduces(editor: &mut FunctionEditor, typing: &Vec<TypeID>) { } } } + +/* + * Looks for reads in fork-joins that are linear in the thread IDs for the fork- + * join. + */ diff --git a/juno_samples/fork_join_tests/src/fork_join_tests.jn b/juno_samples/fork_join_tests/src/fork_join_tests.jn index 3b7c783308cd1949651afb1a7f0cdd708f3f71d7..334fc2bfe4f745cec9004fdf9ebdf80d11818c0f 100644 --- a/juno_samples/fork_join_tests/src/fork_join_tests.jn +++ b/juno_samples/fork_join_tests/src/fork_join_tests.jn @@ -124,7 +124,8 @@ fn test8(input : i32) -> i32[8] { } #[entry] -fn test9<r, c, z : usize>(input : i32[r, c]) -> i32[r, c] { +fn test9<r, c : usize>(input : i32[r, c]) -> i32[r, c] { + const z = 3; const rad = z / 2; @const let out : i32[r, c]; diff --git a/juno_samples/fork_join_tests/src/gpu.sch b/juno_samples/fork_join_tests/src/gpu.sch index c554fd502292c040a405e0ffafb548844d0f1d5f..ca17f69223ae5c39e375f9f47b9badbf195cf0aa 100644 --- a/juno_samples/fork_join_tests/src/gpu.sch +++ b/juno_samples/fork_join_tests/src/gpu.sch @@ -73,6 +73,24 @@ dce(auto.test8); simplify-cfg(auto.test8); dce(auto.test8); +fork-split(auto.test9@filter_loop); +fork-unroll(auto.test9); +fork-unroll(auto.test9); +dce(auto.test9); +ccp(auto.test9); +gvn(auto.test9); +phi-elim(auto.test9); +dce(auto.test9); +fixpoint { + predication(auto.test9); + simplify-cfg(auto.test9); +} +ccp(auto.test9); +gvn(auto.test9); +phi-elim(auto.test9); +lift-dc-math(auto.test9); +dce(auto.test9); + ip-sroa(*); sroa(*); dce(*); @@ -80,7 +98,7 @@ ccp(*); gvn(*); phi-elim(*); dce(*); -gcm(*); +gcm(*); float-collections(test2, auto.test2, test4, auto.test4, test5, auto.test5); gcm(*); diff --git a/juno_samples/fork_join_tests/src/main.rs b/juno_samples/fork_join_tests/src/main.rs index fa99f759ca08814215d1b138758f833f83c38c5f..e66309b22b0650feaab315829dd412f6275e9a99 100644 --- a/juno_samples/fork_join_tests/src/main.rs +++ b/juno_samples/fork_join_tests/src/main.rs @@ -61,7 +61,7 @@ fn main() { let mut r = runner!(test9); let input = vec![1, 2, 3, 4, 5, 6, 7, 8, 9]; let input = HerculesImmBox::from(&input as &[i32]); - let output = r.run(3, 3, 3, input.to()).await; + let output = r.run(3, 3, input.to()).await; let correct = vec![ 1 + 2 + 4 + 5, 1 + 2 + 3 + 4 + 5 + 6,