From 1c3bc6d57cb9ac7a93c78d7253af6868973b200c Mon Sep 17 00:00:00 2001
From: Russel Arbore <rarbore2@illinois.edu>
Date: Wed, 5 Mar 2025 11:09:43 -0600
Subject: [PATCH] more opts

---
 hercules_opt/src/fork_transforms.rs  |  1 +
 juno_samples/cava/src/cpu.sch        |  2 ++
 juno_samples/rodinia/bfs/src/cpu.sch | 14 ++++++++------
 juno_samples/rodinia/bfs/src/gpu.sch | 11 +++--------
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/hercules_opt/src/fork_transforms.rs b/hercules_opt/src/fork_transforms.rs
index 6998f879..c46e4e98 100644
--- a/hercules_opt/src/fork_transforms.rs
+++ b/hercules_opt/src/fork_transforms.rs
@@ -1741,6 +1741,7 @@ fn extend_fork(editor: &mut FunctionEditor, fork: NodeID, join: NodeID, multiple
                     control: new_fork,
                     dimension: idx,
                 });
+                edit.sub_edit(fork, tid);
                 let old_bound = edit.add_node(Node::DynamicConstant { id: *old_factor });
                 edit.add_node(Node::Binary {
                     op: BinaryOperator::LT,
diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch
index ada2f552..32e2b63b 100644
--- a/juno_samples/cava/src/cpu.sch
+++ b/juno_samples/cava/src/cpu.sch
@@ -124,6 +124,8 @@ if !feature("seq") {
   fork-coalesce(fuse4, fuse4_body);
   simpl!(fuse4, fuse4_body);
   fuse4 = fuse4_body;
+} else {
+  fork-tile[6, 0, false, true](fuse4@channel_loop);
 }
 
 no-memset(fuse5@res1);
diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch
index ea6f0403..f564cd36 100644
--- a/juno_samples/rodinia/bfs/src/cpu.sch
+++ b/juno_samples/rodinia/bfs/src/cpu.sch
@@ -41,23 +41,25 @@ parallel-fork(traverse, collect);
 parallel-reduce(traverse, collect);
 
 if !feature("seq") {
-  fork-tile[32, 0, false, true](traverse, collect);
-  let (outer, inner) = fork-reshape[[1], [0]](traverse);
+  fork-tile[32, 0, false, false](traverse, collect);
+  let (outer, inner) = fork-reshape[[0], [1]](traverse);
   traverse = outline(inner);
-  let (outer, inner) = fork-reshape[[1], [0]](collect);
+  let (outer, inner) = fork-reshape[[0], [1]](collect);
   collect = outline(inner);
 
-  fork-tile[32, 0, false, true](init);
-  let (outer, inner) = fork-reshape[[1], [0]](init);
+  fork-tile[32, 0, false, false](init);
+  let (outer, inner) = fork-reshape[[0], [1]](init);
   let init_body = outline(inner);
 
   inline(bfs@cost_init, bfs@loop1, bfs@loop2);
   init = init_body;
 }
+fork-tile[8, 0, false, true](init, traverse, collect);
 delete-uncalled(*);
 const-inline(*);
 simpl!(*);
 
+fork-split(init, traverse, collect);
 unforkify(init, traverse, collect);
 simpl!(*);
-gcm(*);
\ No newline at end of file
+gcm(*);
diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch
index 0253a021..541d15d7 100644
--- a/juno_samples/rodinia/bfs/src/gpu.sch
+++ b/juno_samples/rodinia/bfs/src/gpu.sch
@@ -15,7 +15,7 @@ let traverse = outline(bfs@loop1);
 let collect = outline(bfs@loop2);
 parallel-reduce(traverse, collect);
 no-memset(make_stop_prod);
-gpu(traverse, make_stop_prod, collect);
+gpu(init, traverse, make_stop_prod, collect);
 
 simpl!(*);
 predication(*);
@@ -38,12 +38,7 @@ fixpoint {
 }
 simpl!(collect);
 
-fork-tile[32, 0, false, true](init);
-let (outer, inner) = fork-reshape[[1], [0]](init);
-let init_body = outline(inner);
+fork-tile[1024, 0, false, true](init, traverse, collect);
+fork-split(init, traverse, collect);
 
-fork-tile[1024, 0, false, true](traverse, collect);
-fork-split(traverse, collect);
-
-unforkify(init_body);
 gcm(*);
-- 
GitLab