diff --git a/Cargo.lock b/Cargo.lock index 49630436a0f0b90d8252824046c29f0e18b78af2..ad69bc729a16f266c266174de821cf4ad4071adc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1181,6 +1181,17 @@ dependencies = [ "with_builtin_macros", ] +[[package]] +name = "juno_test" +version = "0.1.0" +dependencies = [ + "async-std", + "hercules_rt", + "juno_build", + "rand", + "with_builtin_macros", +] + [[package]] name = "juno_utils" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index ced011a96c96793891228876314debeabcb561ed..46fc7eaa9448d1fd28d329aa4a2f2621efb38068 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ members = [ "hercules_samples/ccp", "juno_samples/simple3", - "juno_samples/patterns", + "juno_samples/patterns", "juno_samples/matmul", "juno_samples/casts_and_intrinsics", "juno_samples/nested_ccp", @@ -30,4 +30,5 @@ members = [ "juno_samples/cava", "juno_samples/concat", "juno_samples/schedule_test", + "juno_samples/test", ] diff --git a/hercules_opt/src/forkify.rs b/hercules_opt/src/forkify.rs index ec4e9fbcc22d9f1c8a53652173706b40c5b12e65..0f06627d3c8bb53e20051c48c9ba0bd9385402a5 100644 --- a/hercules_opt/src/forkify.rs +++ b/hercules_opt/src/forkify.rs @@ -152,6 +152,7 @@ pub fn forkify_loop( .filter(|id| !l.control[id.idx()]) .collect(); + // FIXME: @xrouth if loop_preds.len() != 1 { return false; } @@ -388,6 +389,7 @@ nest! { is_associative: bool, }, LoopDependant(NodeID), + ControlDependant(NodeID), // This phi is redcutionable, but its cycle might depend on internal control within the loop. UsedByDependant(NodeID), } } @@ -398,6 +400,7 @@ impl LoopPHI { LoopPHI::Reductionable { phi, .. } => *phi, LoopPHI::LoopDependant(node_id) => *node_id, LoopPHI::UsedByDependant(node_id) => *node_id, + LoopPHI::ControlDependant(node_id) => *node_id, } } } @@ -415,6 +418,9 @@ pub fn analyze_phis<'a>( loop_nodes: &'a HashSet<NodeID>, ) -> impl Iterator<Item = LoopPHI> + 'a { + // We are also moving the phi from the top of the loop (the header), + // to the very end (the join). If there are uses of the phi somewhere in the loop, + // then they may try to use the phi (now a reduce) before it hits the join. // Find data cycles within the loop of this phi, // Start from the phis loop_continue_latch, and walk its uses until we find the original phi. @@ -509,6 +515,12 @@ pub fn analyze_phis<'a>( // to have headers that postdominate the loop continue latch. The value of the PHI used needs to be defined // by the time the reduce is triggered (at the end of the loop's internal control). + // If anything in the intersection is a phi (that isn't this own phi), then the reduction cycle depends on control. + // Which is not allowed. + if intersection.iter().any(|cycle_node| editor.node(cycle_node).is_phi() && *cycle_node != *phi) || editor.node(loop_continue_latch).is_phi() { + return LoopPHI::ControlDependant(*phi); + } + // No nodes in data cycles with this phi (in the loop) are used outside the loop, besides the loop_continue_latch. // If some other node in the cycle is used, there is not a valid node to assign it after making the cycle a reduce. if intersection diff --git a/hercules_opt/src/unforkify.rs b/hercules_opt/src/unforkify.rs index 85ffd233dad79ca3339525cdf4542493d8c20124..7d158d1a0378cac55c24533a96ccd36610c8bfeb 100644 --- a/hercules_opt/src/unforkify.rs +++ b/hercules_opt/src/unforkify.rs @@ -133,7 +133,7 @@ pub fn unforkify( if factors.len() > 1 { // For now, don't convert multi-dimensional fork-joins. Rely on pass // that splits fork-joins. - continue; + break; // Because we have to unforkify top down, we can't unforkify forks that are contained } let join_control = nodes[join.idx()].try_join().unwrap(); let tids: Vec<_> = editor diff --git a/hercules_test/hercules_tests/tests/loop_tests.rs b/hercules_test/hercules_tests/tests/loop_tests.rs index 5832a161a18f18ea43860c1c5d6364385d0f187f..192c136622c2f68315daed21b7031455ccab9201 100644 --- a/hercules_test/hercules_tests/tests/loop_tests.rs +++ b/hercules_test/hercules_tests/tests/loop_tests.rs @@ -401,7 +401,7 @@ fn matmul_pipeline() { let dyn_consts = [I, J, K]; // FIXME: This path should not leave the crate - let mut module = parse_module_from_hbin("../../juno_samples/matmul/out.hbin"); + let mut module = parse_module_from_hbin("../../juno_samples/test/out.hbin"); // let mut correct_c: Box<[i32]> = (0..I * K).map(|_| 0).collect(); for i in 0..I { @@ -425,7 +425,7 @@ fn matmul_pipeline() { }; assert_eq!(correct_c[0], value); - let schedule = Some(default_schedule![Xdot, ForkSplit, Unforkify, Xdot,]); + let schedule = Some(default_schedule![AutoOutline, InterproceduralSROA, SROA, InferSchedules, DCE, Xdot, GCM]); module = run_schedule_on_hercules(module, schedule).unwrap(); diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs index 2371e0f20e776e97d39596f656594fc679065264..d2772c71971638205613dc4c92eb7ac835b02983 100644 --- a/juno_scheduler/src/pm.rs +++ b/juno_scheduler/src/pm.rs @@ -1471,29 +1471,38 @@ fn run_pass( } Pass::Forkify => { assert!(args.is_empty()); - pm.make_fork_join_maps(); - pm.make_control_subgraphs(); - pm.make_loops(); - let fork_join_maps = pm.fork_join_maps.take().unwrap(); - let loops = pm.loops.take().unwrap(); - let control_subgraphs = pm.control_subgraphs.take().unwrap(); - for (((func, fork_join_map), loop_nest), control_subgraph) in - build_selection(pm, selection) - .into_iter() - .zip(fork_join_maps.iter()) - .zip(loops.iter()) - .zip(control_subgraphs.iter()) - { - let Some(mut func) = func else { - continue; - }; - // TODO: uses direct return from forkify for now instead of - // func.modified, see comment on top of `forkify` for why. Fix - // this eventually. - changed |= forkify(&mut func, control_subgraph, fork_join_map, loop_nest); + loop { + let mut inner_changed = false; + pm.make_fork_join_maps(); + pm.make_control_subgraphs(); + pm.make_loops(); + let fork_join_maps = pm.fork_join_maps.take().unwrap(); + let loops = pm.loops.take().unwrap(); + let control_subgraphs = pm.control_subgraphs.take().unwrap(); + for (((func, fork_join_map), loop_nest), control_subgraph) in + build_selection(pm, selection.clone()) + .into_iter() + .zip(fork_join_maps.iter()) + .zip(loops.iter()) + .zip(control_subgraphs.iter()) + { + let Some(mut func) = func else { + continue; + }; + // TODO: uses direct return from forkify for now instead of + // func.modified, see comment on top of `forkify` for why. Fix + // this eventually. + let c = forkify(&mut func, control_subgraph, fork_join_map, loop_nest); + changed |= c; + inner_changed |= c; + } + pm.delete_gravestones(); + pm.clear_analyses(); + + if !inner_changed { + break; + } } - pm.delete_gravestones(); - pm.clear_analyses(); } Pass::GCM => { assert!(args.is_empty());