From 84d2978f414a3ba57f70bd359753a3184eae4f49 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Sun, 2 Mar 2025 22:37:06 -0600 Subject: [PATCH] bfs w/ and reduction --- juno_samples/rodinia/bfs/src/gpu.sch | 50 +++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch index 6c4d027b..56489a23 100644 --- a/juno_samples/rodinia/bfs/src/gpu.sch +++ b/juno_samples/rodinia/bfs/src/gpu.sch @@ -10,14 +10,16 @@ macro simpl!(X) { phi-elim(bfs); no-memset(bfs@cost); -let cost_init = outline(bfs@cost_init); +let init = outline(bfs@cost_init); let loop1 = outline(bfs@loop1); let loop2 = outline(bfs@loop2); -gpu(loop1, loop2); +let loop3 = outline(bfs@loop3); +parallel-reduce(loop1); simpl!(*); predication(*); const-inline(*); +loop-bound-canon(*); simpl!(*); fixpoint { forkify(*); @@ -26,14 +28,44 @@ fixpoint { simpl!(*); predication(*); simpl!(*); - -unforkify(cost_init); -parallel-reduce(loop1); -forkify(*); -fork-guard-elim(*); -simpl!(*); -predication(*); reduce-slf(*); simpl!(*); +fork-tile[32, 0, false, true](loop1); +fork-split(loop1); +gpu(loop1); + +fixpoint { + forkify(loop2, loop3); + fork-guard-elim(loop2, loop3); +} + +simpl!(loop2, loop3); +fork-tile[32, 0, false, true](loop2, loop3); +let out = fork-split(loop2, loop3); +clean-monoid-reduces(loop2, loop3); +simpl!(loop2, loop3); +gpu(loop3); + +let fission1 = fork-fission[out.bfs_2.fj0](loop2); +simpl!(loop2); +fork-tile[32, 0, false, true](fission1.bfs_2.fj_bottom); +let out = fork-split(fission1.bfs_2.fj_bottom); +clean-monoid-reduces(loop2); +simpl!(loop2); +let fission2 = fork-fission[out.bfs_2.fj0](loop2); +simpl!(loop2); +fork-tile[32, 0, false, true](fission2.bfs_2.fj_bottom); +let out = fork-split(fission2.bfs_2.fj_bottom); +clean-monoid-reduces(loop2); +simpl!(loop2); +let top = outline(fission1.bfs_2.fj_top); +let middle = outline(fission2.bfs_2.fj_top); +let bottom = outline(out.bfs_2.fj0); +const-inline(loop2, top, middle, bottom); +no-memset(top, middle); +gpu(top, middle, bottom); +simpl!(loop2, top, middle, bottom); + +unforkify(init); gcm(*); -- GitLab