From 72a61f40f290d9b5ced10b2c2caa406998b7c861 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 2 Mar 2025 13:15:40 -0600
Subject: [PATCH] Optimize srad

---
 juno_samples/rodinia/srad/src/gpu.sch | 35 ++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/juno_samples/rodinia/srad/src/gpu.sch b/juno_samples/rodinia/srad/src/gpu.sch
index f736c0b7..f89b7ab8 100644
--- a/juno_samples/rodinia/srad/src/gpu.sch
+++ b/juno_samples/rodinia/srad/src/gpu.sch
@@ -41,15 +41,26 @@ fork-tile[32, 0, false, true](sum_loop);
 let out = fork-split(sum_loop);
 clean-monoid-reduces(sum_loop);
 simpl!(sum_loop);
-let fission = fork-fission[out.srad_0.fj0](sum_loop);
+
+let fission1 = fork-fission[out.srad_0.fj0](sum_loop);
+simpl!(sum_loop);
+fork-tile[32, 0, false, true](fission1.srad_0.fj_bottom);
+let out = fork-split(fission1.srad_0.fj_bottom);
+clean-monoid-reduces(sum_loop);
+simpl!(sum_loop);
+
+let fission2 = fork-fission[out.srad_0.fj0](sum_loop);
 simpl!(sum_loop);
-fork-tile[32, 0, false, true](fission.srad_0.fj_bottom);
-let out = fork-split(fission.srad_0.fj_bottom);
+fork-tile[32, 0, false, true](fission2.srad_0.fj_bottom);
+let out = fork-split(fission2.srad_0.fj_bottom);
 clean-monoid-reduces(sum_loop);
 simpl!(sum_loop);
-let top = outline(fission.srad_0.fj_top);
-let bottom = outline(out.srad_0.fj0);
-gpu(top, bottom);
+
+let first = outline(fission1.srad_0.fj_top);
+let second = outline(fission2.srad_0.fj_top);
+let third = outline(out.srad_0.fj0);
+gpu(first, second, third);
+const-inline[false](*);
 ip-sroa(*);
 sroa(*);
 simpl!(*);
@@ -60,4 +71,16 @@ dce(main_loops);
 fork-split(main_loops);
 simpl!(main_loops);
 
+fork-dim-merge(extract);
+fork-tile[32, 0, false, true](extract);
+dce(extract);
+fork-split(extract);
+simpl!(extract);
+
+fork-dim-merge(compress);
+fork-tile[32, 0, false, true](compress);
+dce(compress);
+fork-split(compress);
+simpl!(compress);
+
 gcm(*);
-- 
GitLab