From 7d59b63ca1517086c4fe5b4bc60ffcc64205fe3f Mon Sep 17 00:00:00 2001
From: Russel Arbore <rarbore2@illinois.edu>
Date: Tue, 4 Mar 2025 13:29:41 -0600
Subject: [PATCH] fix backprop for real

---
 hercules_cg/src/gpu.rs                              | 13 ++++++++++---
 .../rodinia/backprop/benches/backprop_bench.rs      |  2 +-
 juno_samples/rodinia/backprop/src/gpu.sch           |  5 +++--
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs
index 1e6067a3..dd87acbe 100644
--- a/hercules_cg/src/gpu.rs
+++ b/hercules_cg/src/gpu.rs
@@ -562,9 +562,15 @@ namespace cg = cooperative_groups;
      * and writes.
      */
     fn codegen_helpers(&self, w: &mut String) -> Result<(), Error> {
-        write!(w, "\t__shared__ cg::experimental::block_tile_memory<1024> block_sync_shared;\n")?;
+        write!(
+            w,
+            "\t__shared__ cg::experimental::block_tile_memory<1024> block_sync_shared;\n"
+        )?;
         write!(w, "\tcg::grid_group grid = cg::this_grid();\n")?;
-        write!(w, "\tcg::thread_block block = cg::experimental::this_thread_block(block_sync_shared);\n")?;
+        write!(
+            w,
+            "\tcg::thread_block block = cg::experimental::this_thread_block(block_sync_shared);\n"
+        )?;
         Ok(())
     }
 
@@ -1726,7 +1732,8 @@ namespace cg = cooperative_groups;
                         available_thread_quota, cg_tile_available, available_thread_quota
                     )?;
                     if parallel_factor.is_none() {
-                        write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, true))?;
+                        write!(thread_block_tiles, "\t{};\n", self.get_fork_iter(id, true))?;
+                        write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, false))?;
                         write!(w_init, "\tgoto {};\n", self.get_block_name(id, true))?;
                     }
                 }
diff --git a/juno_samples/rodinia/backprop/benches/backprop_bench.rs b/juno_samples/rodinia/backprop/benches/backprop_bench.rs
index 80964c72..492bce3b 100644
--- a/juno_samples/rodinia/backprop/benches/backprop_bench.rs
+++ b/juno_samples/rodinia/backprop/benches/backprop_bench.rs
@@ -68,7 +68,7 @@ fn backprop_bench(c: &mut Criterion) {
     };
 
     bench("backprop bench small", 65536);
-    bench("backprop bench large", 1048576);
+    bench("backprop bench large", 33554432);
 }
 
 criterion_group!(benches, backprop_bench);
diff --git a/juno_samples/rodinia/backprop/src/gpu.sch b/juno_samples/rodinia/backprop/src/gpu.sch
index cc9cc2ac..1773f713 100644
--- a/juno_samples/rodinia/backprop/src/gpu.sch
+++ b/juno_samples/rodinia/backprop/src/gpu.sch
@@ -33,10 +33,11 @@ fixpoint {
 reduce-slf(*);
 simpl!(*);
 
-fork-extend[1024](layer_forward@inner_loop);
+fork-extend[32768](layer_forward@inner_loop);
 clean-monoid-reduces(layer_forward);
 simpl!(layer_forward);
-fork-tile[1024, 0, false, true](layer_forward@inner_loop);
+fork-tile[32768, 0, false, true](layer_forward@inner_loop);
+fork-tile[1024, 1, false, true](layer_forward@inner_loop);
 clean-monoid-reduces(layer_forward);
 let out = fork-split(layer_forward@inner_loop);
 clean-monoid-reduces(layer_forward);
-- 
GitLab