From a8af6c9bcec6102e327f0d76fc554892458a9aec Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Sat, 1 Mar 2025 16:00:59 -0600 Subject: [PATCH] Fix gpu backend, tile on gpu is good actually :shocked: --- hercules_cg/src/gpu.rs | 6 ++++-- juno_samples/cava/src/gpu.sch | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs index c9720273..07dd3ebf 100644 --- a/hercules_cg/src/gpu.rs +++ b/hercules_cg/src/gpu.rs @@ -1225,11 +1225,13 @@ namespace cg = cooperative_groups; // because Fork basic block's init section already does gating write!( w, - "{}{} = (threadIdx.x % {}) / {};\n", + "{}{} = (((threadIdx.x % {}) / {}) / ({})) % ({});\n", tabs, define_variable, use_thread_quota.unwrap(), - use_thread_quota.unwrap() / parallel_factor.unwrap() + use_thread_quota.unwrap() / parallel_factor.unwrap(), + divide, + modulo, )?; } } diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch index aa1df390..92a1835d 100644 --- a/juno_samples/cava/src/gpu.sch +++ b/juno_samples/cava/src/gpu.sch @@ -121,6 +121,12 @@ fork-tile[2, 0, false, true](fuse4@channel_loop); let out = fork-split(fuse4@channel_loop); fork-unroll(out.cava_3.fj1); unforkify(fuse4@channel_loop); +fork-tile[4, 1, false, true](fuse4); +fork-tile[8, 0, false, true](fuse4); +fork-interchange[1, 2](fuse4); +let split = fork-split(fuse4); +fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2); +fork-coalesce(split.cava_3.fj2); no-memset(fuse5@res1); no-memset(fuse5@res2); -- GitLab