From 634e17e9bf1aa169b384c06d0f7fb3591a0afa10 Mon Sep 17 00:00:00 2001 From: Aaron Councilman <aaronjc4@illinois.edu> Date: Mon, 3 Mar 2025 16:39:40 -0600 Subject: [PATCH] More parallelism for backprop --- juno_samples/rodinia/backprop/src/cpu.sch | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/juno_samples/rodinia/backprop/src/cpu.sch b/juno_samples/rodinia/backprop/src/cpu.sch index 9a5f4d75..3c7f7d5f 100644 --- a/juno_samples/rodinia/backprop/src/cpu.sch +++ b/juno_samples/rodinia/backprop/src/cpu.sch @@ -42,12 +42,25 @@ let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_i let forward_input = outline(inner); inline(backprop@forward_input); +// The first call to adjust_weights has total loop dimensions of 1 * 17, so not +// worth parallelizing (given that the body is trivial) +// The second call to adjust_weights has a total dimension of 16 * (input + 1) +// which is worth parallelizing, we'll do it by 16 +inline(backprop@adjust_hidden, backprop@adjust_input); +let adjust_hidden = outline(backprop@adjust_hidden); +let adjust_input = outline(backprop@adjust_input); + +fork-tile[16, 0, false, true](adjust_input); +let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input); +let adjust_input = outline(inner); +inline(backprop@adjust_input); + delete-uncalled(*); const-inline(*); simpl!(*); fork-split(*); -unforkify(output_error, hidden_error, adjust_weights, forward_hidden, forward_input); +unforkify(output_error, hidden_error, adjust_hidden, adjust_input, forward_hidden, forward_input); simpl!(*); gcm(*); -- GitLab