From 7d59b63ca1517086c4fe5b4bc60ffcc64205fe3f Mon Sep 17 00:00:00 2001 From: Russel Arbore <rarbore2@illinois.edu> Date: Tue, 4 Mar 2025 13:29:41 -0600 Subject: [PATCH] fix backprop for real --- hercules_cg/src/gpu.rs | 13 ++++++++++--- .../rodinia/backprop/benches/backprop_bench.rs | 2 +- juno_samples/rodinia/backprop/src/gpu.sch | 5 +++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs index 1e6067a3..dd87acbe 100644 --- a/hercules_cg/src/gpu.rs +++ b/hercules_cg/src/gpu.rs @@ -562,9 +562,15 @@ namespace cg = cooperative_groups; * and writes. */ fn codegen_helpers(&self, w: &mut String) -> Result<(), Error> { - write!(w, "\t__shared__ cg::experimental::block_tile_memory<1024> block_sync_shared;\n")?; + write!( + w, + "\t__shared__ cg::experimental::block_tile_memory<1024> block_sync_shared;\n" + )?; write!(w, "\tcg::grid_group grid = cg::this_grid();\n")?; - write!(w, "\tcg::thread_block block = cg::experimental::this_thread_block(block_sync_shared);\n")?; + write!( + w, + "\tcg::thread_block block = cg::experimental::this_thread_block(block_sync_shared);\n" + )?; Ok(()) } @@ -1726,7 +1732,8 @@ namespace cg = cooperative_groups; available_thread_quota, cg_tile_available, available_thread_quota )?; if parallel_factor.is_none() { - write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, true))?; + write!(thread_block_tiles, "\t{};\n", self.get_fork_iter(id, true))?; + write!(w_init, "\t{} = 0;\n", self.get_fork_iter(id, false))?; write!(w_init, "\tgoto {};\n", self.get_block_name(id, true))?; } } diff --git a/juno_samples/rodinia/backprop/benches/backprop_bench.rs b/juno_samples/rodinia/backprop/benches/backprop_bench.rs index 80964c72..492bce3b 100644 --- a/juno_samples/rodinia/backprop/benches/backprop_bench.rs +++ b/juno_samples/rodinia/backprop/benches/backprop_bench.rs @@ -68,7 +68,7 @@ fn backprop_bench(c: &mut Criterion) { }; bench("backprop bench small", 65536); - bench("backprop bench large", 1048576); + bench("backprop bench large", 33554432); } criterion_group!(benches, backprop_bench); diff --git a/juno_samples/rodinia/backprop/src/gpu.sch b/juno_samples/rodinia/backprop/src/gpu.sch index cc9cc2ac..1773f713 100644 --- a/juno_samples/rodinia/backprop/src/gpu.sch +++ b/juno_samples/rodinia/backprop/src/gpu.sch @@ -33,10 +33,11 @@ fixpoint { reduce-slf(*); simpl!(*); -fork-extend[1024](layer_forward@inner_loop); +fork-extend[32768](layer_forward@inner_loop); clean-monoid-reduces(layer_forward); simpl!(layer_forward); -fork-tile[1024, 0, false, true](layer_forward@inner_loop); +fork-tile[32768, 0, false, true](layer_forward@inner_loop); +fork-tile[1024, 1, false, true](layer_forward@inner_loop); clean-monoid-reduces(layer_forward); let out = fork-split(layer_forward@inner_loop); clean-monoid-reduces(layer_forward); -- GitLab