diff --git a/Cargo.lock b/Cargo.lock index e761361bee6d76f6b08d0680d4be986176121ad2..06ee00ff1bfcff9be1930e8a472de50fe72d4995 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1118,6 +1118,16 @@ dependencies = [ "with_builtin_macros", ] +[[package]] +name = "juno_control" +version = "0.1.0" +dependencies = [ + "async-std", + "hercules_rt", + "juno_build", + "with_builtin_macros", +] + [[package]] name = "juno_edge_detection" version = "0.1.0" @@ -1179,16 +1189,6 @@ dependencies = [ "with_builtin_macros", ] -[[package]] -name = "juno_nested_ccp" -version = "0.1.0" -dependencies = [ - "async-std", - "hercules_rt", - "juno_build", - "with_builtin_macros", -] - [[package]] name = "juno_patterns" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 890d79247dc679deb6994dc768ca511f886f515d..54cfc5123c4c04a92d856ab4f5b0fb44a761a73c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ members = [ "juno_samples/patterns", "juno_samples/matmul", "juno_samples/casts_and_intrinsics", - "juno_samples/nested_ccp", + "juno_samples/control", "juno_samples/antideps", "juno_samples/implicit_clone", "juno_samples/cava", diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs index f6a1f309745adf23b44d093ed8b27e48cd92737f..4f8f91ef177ba947d35d3ef3885d8273d059aa35 100644 --- a/hercules_cg/src/cpu.rs +++ b/hercules_cg/src/cpu.rs @@ -719,7 +719,7 @@ impl<'a> CPUContext<'a> { ); write!( body, - " {} = call i64 @llvm.umax.i64(i64{},i64%dc{}))\n", + " {} = call i64 @llvm.umax.i64(i64{},i64%dc{})\n", new_val, cur_value, x.idx() diff --git a/hercules_opt/src/ccp.rs b/hercules_opt/src/ccp.rs index 9768198cc1b8a4c498e2cb2c5daa7a4a7ee73c9d..1969430a95ea020e9d1fb87d4165ea219d3f96a9 100644 --- a/hercules_opt/src/ccp.rs +++ b/hercules_opt/src/ccp.rs @@ -333,71 +333,6 @@ pub fn ccp(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>) { } Ok(edit) }); - - // Step 4: collapse region chains. - collapse_region_chains(editor); -} - -/* - * Top level function to collapse region chains. A chain is a list of at least - * one region node that takes only one control input. Region chains can be - * deleted. The use of the head of the chain can turn into the use by the user - * of the tail of the chain. - */ -pub fn collapse_region_chains(editor: &mut FunctionEditor) { - let num_nodes = editor.func().nodes.len(); - // Loop over all region nodes. It's fine to modify the function as we loop - // over it. - for id in (0..num_nodes).map(NodeID::new) { - if let Node::Region { preds } = &editor.func().nodes[id.idx()] { - let has_call_user = editor - .get_users(id) - .any(|x| editor.func().nodes[x.idx()].is_call()); - - if preds.len() == 1 && !has_call_user { - // Step 1: bridge gap between use and user. - let predecessor = preds[0]; - let successor = editor - .get_users(id) - .filter(|x| !editor.func().nodes[x.idx()].is_phi()) - .next() - .expect("Region node doesn't have a non-phi user."); - - editor.edit(|edit| { - // Set successor's use of this region to use the region's use. - edit.replace_all_uses_where(id, predecessor, |n| *n == successor) - }); - - // Step 2: bridge gap between uses and users of corresponding - // phi nodes. - let phis: Vec<NodeID> = editor - .get_users(id) - .filter(|x| editor.func().nodes[x.idx()].is_phi()) - .collect(); - for phi_id in phis { - let data_uses = - if let Node::Phi { control, data } = &editor.func().nodes[phi_id.idx()] { - assert!(*control == id); - data - } else { - panic!() - }; - assert!(data_uses.len() == 1, "Phi node doesn't have exactly one data use, while corresponding region had exactly one control use."); - let predecessor = data_uses[0]; - - editor.edit(|mut edit| { - // Set successors' use of this phi to use the phi's use. - edit = edit.replace_all_uses(phi_id, predecessor)?; - // Delete this phi. - edit.delete_node(phi_id) - }); - } - - // Delete this region. - editor.edit(|edit| edit.delete_node(id)); - } - } - } } fn ccp_flow_function( @@ -412,7 +347,7 @@ fn ccp_flow_function( CCPLattice::meet(&val, &inputs[id.idx()]) }), // If node has only one output, if doesn't directly handle crossover of - // reachability and constant propagation. Read handles that. + // reachability and constant propagation. Projection handles that. Node::If { control, cond: _ } => inputs[control.idx()].clone(), Node::Match { control, sum: _ } => inputs[control.idx()].clone(), Node::Fork { @@ -739,10 +674,7 @@ fn ccp_flow_function( { ConstantLattice::top() } else { - ConstantLattice::meet( - first_constant, - &ConstantLattice::meet(second_constant, third_constant), - ) + ConstantLattice::meet(second_constant, third_constant) }; CCPLattice { diff --git a/hercules_opt/src/lib.rs b/hercules_opt/src/lib.rs index 48475f2f334687d60960beac905952025de9f86f..7187508a31240071849a26b85100e89607786b2f 100644 --- a/hercules_opt/src/lib.rs +++ b/hercules_opt/src/lib.rs @@ -19,6 +19,7 @@ pub mod outline; pub mod phi_elim; pub mod pred; pub mod schedule; +pub mod simplify_cfg; pub mod slf; pub mod sroa; pub mod unforkify; @@ -43,6 +44,7 @@ pub use crate::outline::*; pub use crate::phi_elim::*; pub use crate::pred::*; pub use crate::schedule::*; +pub use crate::simplify_cfg::*; pub use crate::slf::*; pub use crate::sroa::*; pub use crate::unforkify::*; diff --git a/hercules_opt/src/simplify_cfg.rs b/hercules_opt/src/simplify_cfg.rs new file mode 100644 index 0000000000000000000000000000000000000000..2e19e6c0b148607e2a4460b0d3cfa9b5311349b1 --- /dev/null +++ b/hercules_opt/src/simplify_cfg.rs @@ -0,0 +1,99 @@ +use std::collections::HashMap; + +use hercules_ir::*; + +use crate::*; + +/* + * Top level function to simplify control flow in a Hercules function. + */ +pub fn simplify_cfg(editor: &mut FunctionEditor, fork_join_map: &HashMap<NodeID, NodeID>) { + // Collapse region chains. + collapse_region_chains(editor); + + // Get rid of unnecessary fork-joins. + remove_useless_fork_joins(editor, fork_join_map); +} + +/* + * Function to collapse region chains. A chain is a list of at least one region + * node that takes only one control input. Region chains can be deleted. The use + * of the head of the chain can turn into the use by the user of the tail of the + * chain. + */ +fn collapse_region_chains(editor: &mut FunctionEditor) { + // Loop over all region nodes. + for id in editor.node_ids() { + if let Node::Region { preds } = &editor.func().nodes[id.idx()] { + let has_call_user = editor + .get_users(id) + .any(|x| editor.func().nodes[x.idx()].is_call()); + + if preds.len() == 1 && !has_call_user { + // Step 1: bridge gap between use and user. + let predecessor = preds[0]; + let successor = editor + .get_users(id) + .filter(|x| !editor.func().nodes[x.idx()].is_phi()) + .next() + .expect("Region node doesn't have a non-phi user."); + + editor.edit(|edit| { + // Set successor's use of this region to use the region's use. + edit.replace_all_uses_where(id, predecessor, |n| *n == successor) + }); + + // Step 2: bridge gap between uses and users of corresponding + // phi nodes. + let phis: Vec<NodeID> = editor + .get_users(id) + .filter(|x| editor.func().nodes[x.idx()].is_phi()) + .collect(); + for phi_id in phis { + let data_uses = + if let Node::Phi { control, data } = &editor.func().nodes[phi_id.idx()] { + assert!(*control == id); + data + } else { + panic!() + }; + assert!(data_uses.len() == 1, "Phi node doesn't have exactly one data use, while corresponding region had exactly one control use."); + let predecessor = data_uses[0]; + + editor.edit(|mut edit| { + // Set successors' use of this phi to use the phi's use. + edit = edit.replace_all_uses(phi_id, predecessor)?; + // Delete this phi. + edit.delete_node(phi_id) + }); + } + + // Delete this region. + editor.edit(|edit| edit.delete_node(id)); + } + } + } +} + +/* + * Function to remove unused fork-joins. A fork-join is unused if there are no + * reduce users of the join node. In such situations, it is asserted there are + * no thread ID users of the fork as well. + */ +fn remove_useless_fork_joins(editor: &mut FunctionEditor, fork_join_map: &HashMap<NodeID, NodeID>) { + for (fork, join) in fork_join_map { + if editor.get_users(*join).len() == 1 { + assert_eq!(editor.get_users(*fork).len(), 1); + + let fork_use = get_uses(&editor.func().nodes[fork.idx()]).as_ref()[0]; + let join_use = get_uses(&editor.func().nodes[join.idx()]).as_ref()[0]; + + editor.edit(|mut edit| { + edit = edit.replace_all_uses(*join, join_use)?; + edit = edit.replace_all_uses(*fork, fork_use)?; + edit = edit.delete_node(*fork)?; + edit.delete_node(*join) + }); + } + } +} diff --git a/juno_samples/nested_ccp/Cargo.toml b/juno_samples/control/Cargo.toml similarity index 88% rename from juno_samples/nested_ccp/Cargo.toml rename to juno_samples/control/Cargo.toml index 5ee3f747477c901806642818553e94b046b50242..4a8823276e007089ca61e859995df9c7df1979b4 100644 --- a/juno_samples/nested_ccp/Cargo.toml +++ b/juno_samples/control/Cargo.toml @@ -1,11 +1,11 @@ [package] -name = "juno_nested_ccp" +name = "juno_control" version = "0.1.0" authors = ["Russel Arbore <rarbore2@illinois.edu>"] edition = "2021" [[bin]] -name = "juno_nested_ccp" +name = "juno_control" path = "src/main.rs" [features] diff --git a/juno_samples/nested_ccp/build.rs b/juno_samples/control/build.rs similarity index 81% rename from juno_samples/nested_ccp/build.rs rename to juno_samples/control/build.rs index 074937e7b0a0ce50032928f30c72feb39b5ecd79..33f9f18b4012c523aac42285056b0860ca8a82d8 100644 --- a/juno_samples/nested_ccp/build.rs +++ b/juno_samples/control/build.rs @@ -4,7 +4,7 @@ fn main() { #[cfg(not(feature = "cuda"))] { JunoCompiler::new() - .file_in_src("nested_ccp.jn") + .file_in_src("control.jn") .unwrap() .build() .unwrap(); @@ -12,7 +12,7 @@ fn main() { #[cfg(feature = "cuda")] { JunoCompiler::new() - .file_in_src("nested_ccp.jn") + .file_in_src("control.jn") .unwrap() .schedule_in_src("gpu.sch") .unwrap() diff --git a/juno_samples/nested_ccp/src/nested_ccp.jn b/juno_samples/control/src/control.jn similarity index 73% rename from juno_samples/nested_ccp/src/nested_ccp.jn rename to juno_samples/control/src/control.jn index aed4e29725a6ce7f7621d367c9523f5ec9a26f8d..d6687327110838dc1652f1e911f5163c5a3a568a 100644 --- a/juno_samples/nested_ccp/src/nested_ccp.jn +++ b/juno_samples/control/src/control.jn @@ -38,3 +38,20 @@ fn no_underflow() -> usize { return res; } + +#[entry] +fn useless_control(arg: f32) -> f32 { + let x : f32; + if arg > 0.0 { + x = 7.0; + } else { + x = 9.0 - 2.0; + } + for i = 0 to 1 { + x += arg; + } + for i = 0 to 10 { + x = 13.0; + } + return x; +} diff --git a/juno_samples/control/src/gpu.sch b/juno_samples/control/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..3ae9473d3c57b7ce765f7c3b1b0ce52f22072d7f --- /dev/null +++ b/juno_samples/control/src/gpu.sch @@ -0,0 +1,30 @@ +gvn(*); +phi-elim(*); +dce(*); + +let out = auto-outline(*); +gpu(out.ccp_example, out.median_array, out.no_underflow, out.useless_control); + +ip-sroa(*); +sroa(*); +dce(*); +gvn(*); +dce(*); +phi-elim(*); +dce(*); +ccp(out.useless_control); +dce(out.useless_control); +simplify-cfg(out.useless_control); +dce(out.useless_control); +predication(out.useless_control); +dce(out.useless_control); +forkify(out.useless_control); +fork-guard-elim(out.useless_control); +fork-coalesce(out.useless_control); +dce(out.useless_control); +simplify-cfg(out.useless_control); +dce(out.useless_control); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/nested_ccp/src/main.rs b/juno_samples/control/src/main.rs similarity index 87% rename from juno_samples/nested_ccp/src/main.rs rename to juno_samples/control/src/main.rs index b364c03c4cddcc1fc94171ff55eb3c1ff00e5b3e..af3408edbcaf0b6b0c3dbdb0fc65f9a2a6032cd5 100644 --- a/juno_samples/nested_ccp/src/main.rs +++ b/juno_samples/control/src/main.rs @@ -4,7 +4,7 @@ use hercules_rt::CUDABox; use hercules_rt::{runner, HerculesCPURef, HerculesCPURefMut}; -juno_build::juno!("nested_ccp"); +juno_build::juno!("control"); fn main() { async_std::task::block_on(async { @@ -13,32 +13,39 @@ fn main() { #[cfg(not(feature = "cuda"))] { let a = HerculesCPURef::from_slice(&a); - let b = HerculesCPURefMut::from_slice(&mut b); let mut r = runner!(ccp_example); let output_example = r.run(a).await; + assert_eq!(output_example, 1.0); + + let b = HerculesCPURefMut::from_slice(&mut b); let mut r = runner!(median_array); let output_median = r.run(9, b).await; - assert_eq!(output_example, 1.0); assert_eq!(output_median, 18); } #[cfg(feature = "cuda")] { let mut a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&a)); - let mut b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&b)); let mut r = runner!(ccp_example); let output_example = r.run(a.get_ref_mut()).await; + assert_eq!(output_example, 1.0); + + let mut b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&b)); let mut r = runner!(median_array); let output_median = r.run(9, b.get_ref_mut()).await; - assert_eq!(output_example, 1.0); assert_eq!(output_median, 18); } + let mut r = runner!(no_underflow); let out_no_underflow = r.run().await; assert_eq!(out_no_underflow, 7); + + let mut r = runner!(useless_control); + let out_useless_control = r.run(-1.0).await; + assert_eq!(out_useless_control, 13.0); }); } #[test] -fn nested_ccp_test() { +fn control_test() { main(); } diff --git a/juno_samples/nested_ccp/src/gpu.sch b/juno_samples/nested_ccp/src/gpu.sch deleted file mode 100644 index c56d046a686ea8bddd8c9e9f91d88eb39a2eaf31..0000000000000000000000000000000000000000 --- a/juno_samples/nested_ccp/src/gpu.sch +++ /dev/null @@ -1,17 +0,0 @@ -gvn(*); -phi-elim(*); -dce(*); - -let out = auto-outline(*); -gpu(out.ccp_example, out.median_array, out.no_underflow); - -ip-sroa(*); -sroa(*); -dce(*); -gvn(*); -phi-elim(*); -dce(*); - -infer-schedules(*); - -gcm(*); diff --git a/juno_samples/complex_tuple.jn b/juno_samples/unused/complex_tuple.jn similarity index 100% rename from juno_samples/complex_tuple.jn rename to juno_samples/unused/complex_tuple.jn diff --git a/juno_samples/cpu.sch b/juno_samples/unused/cpu.sch similarity index 100% rename from juno_samples/cpu.sch rename to juno_samples/unused/cpu.sch diff --git a/juno_samples/gpu.sch b/juno_samples/unused/gpu.sch similarity index 100% rename from juno_samples/gpu.sch rename to juno_samples/unused/gpu.sch diff --git a/juno_samples/intrinsics1.jn b/juno_samples/unused/intrinsics1.jn similarity index 100% rename from juno_samples/intrinsics1.jn rename to juno_samples/unused/intrinsics1.jn diff --git a/juno_samples/intrinsics2.jn b/juno_samples/unused/intrinsics2.jn similarity index 100% rename from juno_samples/intrinsics2.jn rename to juno_samples/unused/intrinsics2.jn diff --git a/juno_samples/intrinsics3.jn b/juno_samples/unused/intrinsics3.jn similarity index 100% rename from juno_samples/intrinsics3.jn rename to juno_samples/unused/intrinsics3.jn diff --git a/juno_samples/matadd.jn b/juno_samples/unused/matadd.jn similarity index 100% rename from juno_samples/matadd.jn rename to juno_samples/unused/matadd.jn diff --git a/juno_samples/nested_ccp.jn b/juno_samples/unused/nested_ccp.jn similarity index 100% rename from juno_samples/nested_ccp.jn rename to juno_samples/unused/nested_ccp.jn diff --git a/juno_samples/poly_matmul.jn b/juno_samples/unused/poly_matmul.jn similarity index 100% rename from juno_samples/poly_matmul.jn rename to juno_samples/unused/poly_matmul.jn diff --git a/juno_samples/products.jn b/juno_samples/unused/products.jn similarity index 100% rename from juno_samples/products.jn rename to juno_samples/unused/products.jn diff --git a/juno_samples/simple1.jn b/juno_samples/unused/simple1.jn similarity index 100% rename from juno_samples/simple1.jn rename to juno_samples/unused/simple1.jn diff --git a/juno_samples/simple1_withcall.jn b/juno_samples/unused/simple1_withcall.jn similarity index 100% rename from juno_samples/simple1_withcall.jn rename to juno_samples/unused/simple1_withcall.jn diff --git a/juno_samples/simple2.jn b/juno_samples/unused/simple2.jn similarity index 100% rename from juno_samples/simple2.jn rename to juno_samples/unused/simple2.jn diff --git a/juno_samples/test1.jn b/juno_samples/unused/test1.jn similarity index 100% rename from juno_samples/test1.jn rename to juno_samples/unused/test1.jn diff --git a/juno_samples/vecadd.jn b/juno_samples/unused/vecadd.jn similarity index 100% rename from juno_samples/vecadd.jn rename to juno_samples/unused/vecadd.jn diff --git a/juno_scheduler/src/compile.rs b/juno_scheduler/src/compile.rs index 713c30d436fdb9316f92f7ecbeee3659f2924d83..123111b874c97110350b6e45ab9151f59a3aa1f0 100644 --- a/juno_scheduler/src/compile.rs +++ b/juno_scheduler/src/compile.rs @@ -115,6 +115,7 @@ impl FromStr for Appliable { "outline" => Ok(Appliable::Pass(ir::Pass::Outline)), "phi-elim" => Ok(Appliable::Pass(ir::Pass::PhiElim)), "predication" => Ok(Appliable::Pass(ir::Pass::Predication)), + "simplify-cfg" => Ok(Appliable::Pass(ir::Pass::SimplifyCFG)), "slf" | "store-load-forward" => Ok(Appliable::Pass(ir::Pass::SLF)), "sroa" => Ok(Appliable::Pass(ir::Pass::SROA)), "unforkify" => Ok(Appliable::Pass(ir::Pass::Unforkify)), diff --git a/juno_scheduler/src/default.rs b/juno_scheduler/src/default.rs index d1f139db58c944733d6cbbf43d0c52f651d72f23..2cd2c122aaaa7c06622bc30c3e3c662f673af9a1 100644 --- a/juno_scheduler/src/default.rs +++ b/juno_scheduler/src/default.rs @@ -46,6 +46,7 @@ pub fn default_schedule() -> ScheduleStmt { PhiElim, DCE, CCP, + SimplifyCFG, DCE, GVN, DCE, @@ -59,6 +60,7 @@ pub fn default_schedule() -> ScheduleStmt { Predication, DCE, CCP, + SimplifyCFG, DCE, GVN, DCE, @@ -66,18 +68,23 @@ pub fn default_schedule() -> ScheduleStmt { DCE, GVN, DCE, - // Forkify, - // ForkGuardElim, - // ForkCoalesce, + Forkify, + ForkGuardElim, + ForkCoalesce, + DCE, + SimplifyCFG, DCE, ForkSplit, Unforkify, - GVN, + CCP, + SimplifyCFG, DCE, + GVN, DCE, AutoOutline, InterproceduralSROA, SROA, + SimplifyCFG, InferSchedules, DCE, GCM, diff --git a/juno_scheduler/src/ir.rs b/juno_scheduler/src/ir.rs index 9e85509f83a322da222e56257ffffc697ab14e30..e8cc2d39ce43bed831acc0bf7ef4231a4ebd0149 100644 --- a/juno_scheduler/src/ir.rs +++ b/juno_scheduler/src/ir.rs @@ -23,6 +23,7 @@ pub enum Pass { Outline, PhiElim, Predication, + SimplifyCFG, SLF, SROA, Unforkify, diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs index 901361c6a438a90abde4ebccf8e85e2cda4bc564..7d9687f644c47a54ad657f322e00c527973b49af 100644 --- a/juno_scheduler/src/pm.rs +++ b/juno_scheduler/src/pm.rs @@ -1946,6 +1946,24 @@ fn run_pass( pm.delete_gravestones(); pm.clear_analyses(); } + Pass::SimplifyCFG => { + assert!(args.is_empty()); + pm.make_fork_join_maps(); + let fork_join_maps = pm.fork_join_maps.take().unwrap(); + + for (func, fork_join_map) in build_selection(pm, selection) + .into_iter() + .zip(fork_join_maps.iter()) + { + let Some(mut func) = func else { + continue; + }; + simplify_cfg(&mut func, fork_join_map); + changed |= func.modified(); + } + pm.delete_gravestones(); + pm.clear_analyses(); + } Pass::SROA => { assert!(args.is_empty()); pm.make_reverse_postorders();