diff --git a/hercules_opt/src/forkify.rs b/hercules_opt/src/forkify.rs index ec4e9fbcc22d9f1c8a53652173706b40c5b12e65..082f1ae958c7c9f90fd4719a17535fc7e604dfdd 100644 --- a/hercules_opt/src/forkify.rs +++ b/hercules_opt/src/forkify.rs @@ -10,8 +10,8 @@ use hercules_ir::*; use crate::*; -/* - * TODO: Forkify currently makes a bunch of small edits - this needs to be +/* + * TODO: Forkify currently makes a bunch of small edits - this needs to be * changed so that every loop that gets forkified corresponds to a single edit * + sub-edits. This would allow us to run forkify on a subset of a function. */ @@ -349,6 +349,27 @@ pub fn forkify_loop( }; let reduce_id = edit.add_node(reduce); + if (!edit.get_node(init).is_reduce() + && edit.get_schedule(init).contains(&Schedule::ParallelReduce)) + || (!edit.get_node(continue_latch).is_reduce() + && edit + .get_schedule(continue_latch) + .contains(&Schedule::ParallelReduce)) + { + edit = edit.add_schedule(reduce_id, Schedule::ParallelReduce)?; + } + if (!edit.get_node(init).is_reduce() + && edit + .get_schedule(init) + .contains(&Schedule::TightAssociative)) + || (!edit.get_node(continue_latch).is_reduce() + && edit + .get_schedule(continue_latch) + .contains(&Schedule::TightAssociative)) + { + edit = edit.add_schedule(reduce_id, Schedule::TightAssociative)?; + } + edit = edit.replace_all_uses_where(phi, reduce_id, |usee| *usee != reduce_id)?; edit = edit.replace_all_uses_where(continue_latch, reduce_id, |usee| { !loop_nodes.contains(usee) && *usee != reduce_id @@ -414,9 +435,8 @@ pub fn analyze_phis<'a>( phis: &'a [NodeID], loop_nodes: &'a HashSet<NodeID>, ) -> impl Iterator<Item = LoopPHI> + 'a { - - // Find data cycles within the loop of this phi, - // Start from the phis loop_continue_latch, and walk its uses until we find the original phi. + // Find data cycles within the loop of this phi, + // Start from the phis loop_continue_latch, and walk its uses until we find the original phi. phis.into_iter().map(move |phi| { let stop_on: HashSet<NodeID> = editor @@ -465,7 +485,7 @@ pub fn analyze_phis<'a>( .unwrap(); let loop_continue_latch = editor.node(phi).try_phi().unwrap().1[continue_idx]; - + let uses = walk_all_uses_stop_on(loop_continue_latch, editor, stop_on.clone()); let users = walk_all_users_stop_on(*phi, editor, stop_on.clone()); @@ -513,7 +533,7 @@ pub fn analyze_phis<'a>( // If some other node in the cycle is used, there is not a valid node to assign it after making the cycle a reduce. if intersection .iter() - .filter(|node| **node != loop_continue_latch ) + .filter(|node| **node != loop_continue_latch) .any(|data_node| { editor .get_users(*data_node) diff --git a/juno_samples/fork_join_tests/src/fork_join_tests.jn b/juno_samples/fork_join_tests/src/fork_join_tests.jn index 55e0a37e2194ff172add63c1b436dccd1cfd833d..bcfa1d25d0281b5c842d62142976c7518fe194e6 100644 --- a/juno_samples/fork_join_tests/src/fork_join_tests.jn +++ b/juno_samples/fork_join_tests/src/fork_join_tests.jn @@ -59,3 +59,16 @@ fn test4(input : i32) -> i32[4, 4] { } return arr; } + +#[entry] +fn test5(input : i32) -> i32[4] { + let arr1 : i32[4]; + for i = 0 to 4 { + let red = arr1[i]; + for k = 0 to 3 { + red += k as i32; + } + @reduce arr1[i] = red + input; + } + return arr1; +} diff --git a/juno_samples/fork_join_tests/src/gpu.sch b/juno_samples/fork_join_tests/src/gpu.sch index 701c347cfc57b6ae55e031e3143ec7858ff3e1a4..6e2d6845423d13e0c2208b0169e3437a4735ba8f 100644 --- a/juno_samples/fork_join_tests/src/gpu.sch +++ b/juno_samples/fork_join_tests/src/gpu.sch @@ -1,3 +1,5 @@ +parallel-reduce(test5@reduce); + gvn(*); phi-elim(*); dce(*); @@ -7,6 +9,7 @@ gpu(out.test1); gpu(out.test2); gpu(out.test3); gpu(out.test4); +gpu(out.test5); ip-sroa(*); sroa(*); diff --git a/juno_samples/fork_join_tests/src/main.rs b/juno_samples/fork_join_tests/src/main.rs index cbd42c50ac95d102f248477a3bee0cd517eb9d5b..4b6aba655a45a2fe2f844efe75f4e8a738a17b96 100644 --- a/juno_samples/fork_join_tests/src/main.rs +++ b/juno_samples/fork_join_tests/src/main.rs @@ -37,6 +37,11 @@ fn main() { let output = r.run(9).await; let correct = vec![63i32; 16]; assert(correct, output); + + let mut r = runner!(test5); + let output = r.run(4).await; + let correct = vec![7i32; 4]; + assert(correct, output); }); } diff --git a/juno_scheduler/src/compile.rs b/juno_scheduler/src/compile.rs index 11a8ec53b1fd52ec37fb260d3219c849760aee49..80cf2cb42d30edfd43565f397d4973ea43b3c515 100644 --- a/juno_scheduler/src/compile.rs +++ b/juno_scheduler/src/compile.rs @@ -130,6 +130,7 @@ impl FromStr for Appliable { "parallel-fork" => Ok(Appliable::Schedule(Schedule::ParallelFork)), "parallel-reduce" => Ok(Appliable::Schedule(Schedule::ParallelReduce)), "vectorize" => Ok(Appliable::Schedule(Schedule::Vectorizable)), + "no-memset" | "no-reset" => Ok(Appliable::Schedule(Schedule::NoResetConstant)), _ => Err(s.to_string()), } diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs index 9c818707ef2b3ac8bfed3eaf786bdcdca6c32281..20a3dba869a5de6ea0fae54e0de3430c2a14980a 100644 --- a/juno_scheduler/src/pm.rs +++ b/juno_scheduler/src/pm.rs @@ -780,8 +780,15 @@ impl PassManager { .stdin(Stdio::piped()) .stdout(Stdio::piped()) .spawn() - .expect("Error running clang. Is it installed?"); - assert!(clang_process.wait().unwrap().success()); + .expect("PANIC: Error running clang. Is it installed?"); + if clang_process + .wait() + .map(|status| !status.success()) + .unwrap_or(false) + { + let path = tmp_dir.into_path(); + panic!("PANIC: Clang failed to compile the LLVM IR module. Persisting temporary directory ({}).", path.display()); + } let mut ar_args = vec!["crus", &output_archive, &llvm_object]; @@ -806,8 +813,15 @@ impl PassManager { .arg(&cuda_object) .arg(&cuda_path) .spawn() - .expect("Error running nvcc. Is it installed?"); - assert!(nvcc_process.wait().unwrap().success()); + .expect("PANIC: Error running NVCC. Is it installed?"); + if nvcc_process + .wait() + .map(|status| !status.success()) + .unwrap_or(false) + { + let path = tmp_dir.into_path(); + panic!("PANIC: NVCC failed to compile the CUDA module. Persisting temporary directory ({}).", path.display()); + } ar_args.push(&cuda_object); } @@ -816,7 +830,17 @@ impl PassManager { .args(&ar_args) .spawn() .expect("Error running ar. Is it installed?"); - assert!(ar_process.wait().unwrap().success()); + if ar_process + .wait() + .map(|status| !status.success()) + .unwrap_or(false) + { + let path = tmp_dir.into_path(); + panic!( + "PANIC: Ar failed to create a static library. Persisting temporary directory ({}).", + path.display() + ); + } // Write the Rust runtime into a file. let output_rt = format!("{}/rt_{}.hrt", output_dir, module_name);