From 84d2978f414a3ba57f70bd359753a3184eae4f49 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 2 Mar 2025 22:37:06 -0600
Subject: [PATCH] bfs w/ and reduction

---
 juno_samples/rodinia/bfs/src/gpu.sch | 50 +++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch
index 6c4d027b..56489a23 100644
--- a/juno_samples/rodinia/bfs/src/gpu.sch
+++ b/juno_samples/rodinia/bfs/src/gpu.sch
@@ -10,14 +10,16 @@ macro simpl!(X) {
 
 phi-elim(bfs);
 no-memset(bfs@cost);
-let cost_init = outline(bfs@cost_init);
+let init = outline(bfs@cost_init);
 let loop1 = outline(bfs@loop1);
 let loop2 = outline(bfs@loop2);
-gpu(loop1, loop2);
+let loop3 = outline(bfs@loop3);
+parallel-reduce(loop1);
 
 simpl!(*);
 predication(*);
 const-inline(*);
+loop-bound-canon(*);
 simpl!(*);
 fixpoint {
   forkify(*);
@@ -26,14 +28,44 @@ fixpoint {
 simpl!(*);
 predication(*);
 simpl!(*);
-
-unforkify(cost_init);
-parallel-reduce(loop1);
-forkify(*);
-fork-guard-elim(*);
-simpl!(*);
-predication(*);
 reduce-slf(*);
 simpl!(*);
 
+fork-tile[32, 0, false, true](loop1);
+fork-split(loop1);
+gpu(loop1);
+
+fixpoint {
+  forkify(loop2, loop3);
+  fork-guard-elim(loop2, loop3);
+}
+
+simpl!(loop2, loop3);
+fork-tile[32, 0, false, true](loop2, loop3);
+let out = fork-split(loop2, loop3);
+clean-monoid-reduces(loop2, loop3);
+simpl!(loop2, loop3);
+gpu(loop3);
+
+let fission1 = fork-fission[out.bfs_2.fj0](loop2);
+simpl!(loop2);
+fork-tile[32, 0, false, true](fission1.bfs_2.fj_bottom);
+let out = fork-split(fission1.bfs_2.fj_bottom);
+clean-monoid-reduces(loop2);
+simpl!(loop2);
+let fission2 = fork-fission[out.bfs_2.fj0](loop2);
+simpl!(loop2);
+fork-tile[32, 0, false, true](fission2.bfs_2.fj_bottom);
+let out = fork-split(fission2.bfs_2.fj_bottom);
+clean-monoid-reduces(loop2);
+simpl!(loop2);
+let top = outline(fission1.bfs_2.fj_top);
+let middle = outline(fission2.bfs_2.fj_top);
+let bottom = outline(out.bfs_2.fj0);
+const-inline(loop2, top, middle, bottom);
+no-memset(top, middle);
+gpu(top, middle, bottom);
+simpl!(loop2, top, middle, bottom);
+
+unforkify(init);
 gcm(*);
-- 
GitLab