From 1c3bc6d57cb9ac7a93c78d7253af6868973b200c Mon Sep 17 00:00:00 2001 From: Russel Arbore <rarbore2@illinois.edu> Date: Wed, 5 Mar 2025 11:09:43 -0600 Subject: [PATCH] more opts --- hercules_opt/src/fork_transforms.rs | 1 + juno_samples/cava/src/cpu.sch | 2 ++ juno_samples/rodinia/bfs/src/cpu.sch | 14 ++++++++------ juno_samples/rodinia/bfs/src/gpu.sch | 11 +++-------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/hercules_opt/src/fork_transforms.rs b/hercules_opt/src/fork_transforms.rs index 6998f879..c46e4e98 100644 --- a/hercules_opt/src/fork_transforms.rs +++ b/hercules_opt/src/fork_transforms.rs @@ -1741,6 +1741,7 @@ fn extend_fork(editor: &mut FunctionEditor, fork: NodeID, join: NodeID, multiple control: new_fork, dimension: idx, }); + edit.sub_edit(fork, tid); let old_bound = edit.add_node(Node::DynamicConstant { id: *old_factor }); edit.add_node(Node::Binary { op: BinaryOperator::LT, diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index ada2f552..32e2b63b 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -124,6 +124,8 @@ if !feature("seq") { fork-coalesce(fuse4, fuse4_body); simpl!(fuse4, fuse4_body); fuse4 = fuse4_body; +} else { + fork-tile[6, 0, false, true](fuse4@channel_loop); } no-memset(fuse5@res1); diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch index ea6f0403..f564cd36 100644 --- a/juno_samples/rodinia/bfs/src/cpu.sch +++ b/juno_samples/rodinia/bfs/src/cpu.sch @@ -41,23 +41,25 @@ parallel-fork(traverse, collect); parallel-reduce(traverse, collect); if !feature("seq") { - fork-tile[32, 0, false, true](traverse, collect); - let (outer, inner) = fork-reshape[[1], [0]](traverse); + fork-tile[32, 0, false, false](traverse, collect); + let (outer, inner) = fork-reshape[[0], [1]](traverse); traverse = outline(inner); - let (outer, inner) = fork-reshape[[1], [0]](collect); + let (outer, inner) = fork-reshape[[0], [1]](collect); collect = outline(inner); - fork-tile[32, 0, false, true](init); - let (outer, inner) = fork-reshape[[1], [0]](init); + fork-tile[32, 0, false, false](init); + let (outer, inner) = fork-reshape[[0], [1]](init); let init_body = outline(inner); inline(bfs@cost_init, bfs@loop1, bfs@loop2); init = init_body; } +fork-tile[8, 0, false, true](init, traverse, collect); delete-uncalled(*); const-inline(*); simpl!(*); +fork-split(init, traverse, collect); unforkify(init, traverse, collect); simpl!(*); -gcm(*); \ No newline at end of file +gcm(*); diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch index 0253a021..541d15d7 100644 --- a/juno_samples/rodinia/bfs/src/gpu.sch +++ b/juno_samples/rodinia/bfs/src/gpu.sch @@ -15,7 +15,7 @@ let traverse = outline(bfs@loop1); let collect = outline(bfs@loop2); parallel-reduce(traverse, collect); no-memset(make_stop_prod); -gpu(traverse, make_stop_prod, collect); +gpu(init, traverse, make_stop_prod, collect); simpl!(*); predication(*); @@ -38,12 +38,7 @@ fixpoint { } simpl!(collect); -fork-tile[32, 0, false, true](init); -let (outer, inner) = fork-reshape[[1], [0]](init); -let init_body = outline(inner); +fork-tile[1024, 0, false, true](init, traverse, collect); +fork-split(init, traverse, collect); -fork-tile[1024, 0, false, true](traverse, collect); -fork-split(traverse, collect); - -unforkify(init_body); gcm(*); -- GitLab