From d303b1850dfd142871ee9cb819f0edff33334d6a Mon Sep 17 00:00:00 2001
From: Russel Arbore <rarbore2@illinois.edu>
Date: Mon, 3 Mar 2025 13:35:02 -0600
Subject: [PATCH] fix syncthreads hack

---
 hercules_cg/src/gpu.rs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs
index 3a00e547..1e6067a3 100644
--- a/hercules_cg/src/gpu.rs
+++ b/hercules_cg/src/gpu.rs
@@ -562,8 +562,9 @@ namespace cg = cooperative_groups;
      * and writes.
      */
     fn codegen_helpers(&self, w: &mut String) -> Result<(), Error> {
+        write!(w, "\t__shared__ cg::experimental::block_tile_memory<1024> block_sync_shared;\n")?;
         write!(w, "\tcg::grid_group grid = cg::this_grid();\n")?;
-        write!(w, "\tcg::thread_block block = cg::this_thread_block();\n")?;
+        write!(w, "\tcg::thread_block block = cg::experimental::this_thread_block(block_sync_shared);\n")?;
         Ok(())
     }
 
@@ -1322,8 +1323,8 @@ namespace cg = cooperative_groups;
                     )?;
                     write!(w, "{}\t*({} + i) = 0;\n", tabs, define_variable)?;
                     write!(w, "{}}}\n", tabs)?;
-                    //write!(w, "{}{}.sync();\n", tabs, cg_tile)?;
-                    write!(w, "__syncthreads\n")?;
+                    write!(w, "{}{}.sync();\n", tabs, cg_tile)?;
+                    //write!(w, "__syncthreads\n")?;
                 }
             }
             // Dynamic constants emitted at top
@@ -1783,8 +1784,8 @@ namespace cg = cooperative_groups;
                     }
                     let fork = self.join_fork_map.get(&id).unwrap();
                     let cg_tile_available = self.get_cg_tile(*fork, CGType::Available);
-                    //write!(w_term, "\t{}.sync();\n", cg_tile_available)?;
-                    write!(w_term, "\t__syncthreads;\n")?;
+                    write!(w_term, "\t{}.sync();\n", cg_tile_available)?;
+                    //write!(w_term, "\t__syncthreads;\n")?;
                 }
                 // If the Fork was parallelized, each thread or UsedPerId tile of
                 // threads only runs one ThreadID, so we can jump straight to the
-- 
GitLab