From 634e17e9bf1aa169b384c06d0f7fb3591a0afa10 Mon Sep 17 00:00:00 2001
From: Aaron Councilman <aaronjc4@illinois.edu>
Date: Mon, 3 Mar 2025 16:39:40 -0600
Subject: [PATCH] More parallelism for backprop

---
 juno_samples/rodinia/backprop/src/cpu.sch | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/juno_samples/rodinia/backprop/src/cpu.sch b/juno_samples/rodinia/backprop/src/cpu.sch
index 9a5f4d75..3c7f7d5f 100644
--- a/juno_samples/rodinia/backprop/src/cpu.sch
+++ b/juno_samples/rodinia/backprop/src/cpu.sch
@@ -42,12 +42,25 @@ let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_i
 let forward_input = outline(inner);
 inline(backprop@forward_input);
 
+// The first call to adjust_weights has total loop dimensions of 1 * 17, so not
+// worth parallelizing (given that the body is trivial)
+// The second call to adjust_weights has a total dimension of 16 * (input + 1)
+// which is worth parallelizing, we'll do it by 16
+inline(backprop@adjust_hidden, backprop@adjust_input);
+let adjust_hidden = outline(backprop@adjust_hidden);
+let adjust_input = outline(backprop@adjust_input);
+
+fork-tile[16, 0, false, true](adjust_input);
+let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input);
+let adjust_input = outline(inner);
+inline(backprop@adjust_input);
+
 delete-uncalled(*);
 const-inline(*);
 
 simpl!(*);
 fork-split(*);
-unforkify(output_error, hidden_error, adjust_weights, forward_hidden, forward_input);
+unforkify(output_error, hidden_error, adjust_hidden, adjust_input, forward_hidden, forward_input);
 simpl!(*);
 
 gcm(*);
-- 
GitLab