From a8af6c9bcec6102e327f0d76fc554892458a9aec Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 1 Mar 2025 16:00:59 -0600
Subject: [PATCH] Fix gpu backend, tile on gpu is good actually :shocked:

---
 hercules_cg/src/gpu.rs        | 6 ++++--
 juno_samples/cava/src/gpu.sch | 6 ++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs
index c9720273..07dd3ebf 100644
--- a/hercules_cg/src/gpu.rs
+++ b/hercules_cg/src/gpu.rs
@@ -1225,11 +1225,13 @@ namespace cg = cooperative_groups;
                             // because Fork basic block's init section already does gating
                             write!(
                                 w,
-                                "{}{} = (threadIdx.x % {}) / {};\n",
+                                "{}{} = (((threadIdx.x % {}) / {}) / ({})) % ({});\n",
                                 tabs,
                                 define_variable,
                                 use_thread_quota.unwrap(),
-                                use_thread_quota.unwrap() / parallel_factor.unwrap()
+                                use_thread_quota.unwrap() / parallel_factor.unwrap(),
+                                divide,
+                                modulo,
                             )?;
                         }
                     }
diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch
index aa1df390..92a1835d 100644
--- a/juno_samples/cava/src/gpu.sch
+++ b/juno_samples/cava/src/gpu.sch
@@ -121,6 +121,12 @@ fork-tile[2, 0, false, true](fuse4@channel_loop);
 let out = fork-split(fuse4@channel_loop);
 fork-unroll(out.cava_3.fj1);
 unforkify(fuse4@channel_loop);
+fork-tile[4, 1, false, true](fuse4);
+fork-tile[8, 0, false, true](fuse4);
+fork-interchange[1, 2](fuse4);
+let split = fork-split(fuse4);
+fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2);
+fork-coalesce(split.cava_3.fj2);
 
 no-memset(fuse5@res1);
 no-memset(fuse5@res2);
-- 
GitLab