From d173f53bfae51bf9b27e795104eaa4e18b537acc Mon Sep 17 00:00:00 2001 From: Russel Arbore <rarbore2@illinois.edu> Date: Thu, 27 Feb 2025 16:55:11 -0600 Subject: [PATCH] fused sum reduction is very fast --- juno_samples/rodinia/srad/src/gpu.sch | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/juno_samples/rodinia/srad/src/gpu.sch b/juno_samples/rodinia/srad/src/gpu.sch index f7885f9b..289548f9 100644 --- a/juno_samples/rodinia/srad/src/gpu.sch +++ b/juno_samples/rodinia/srad/src/gpu.sch @@ -9,9 +9,9 @@ macro simpl!(X) { } phi-elim(*); -let init_loop = outline(srad@loop1); +let sum_loop = outline(srad@loop1); let main_loops = outline(srad@loop2 | srad@loop3); -gpu(init_loop, main_loops, extract, compress); +gpu(main_loops, extract, compress); simpl!(*); const-inline[true](*); crc(*); @@ -35,4 +35,23 @@ simpl!(*); slf(*); simpl!(*); +fork-dim-merge(sum_loop); +simpl!(sum_loop); +fork-tile[32, 0, false, true](sum_loop); +let out = fork-split(sum_loop); +clean-monoid-reduces(sum_loop); +simpl!(sum_loop); +let fission = fork-fission[out.srad_0.fj0](sum_loop); +simpl!(sum_loop); +fork-tile[32, 0, false, true](fission.srad_0.fj_bottom); +let out = fork-split(fission.srad_0.fj_bottom); +clean-monoid-reduces(sum_loop); +simpl!(sum_loop); +let top = outline(fission.srad_0.fj_top); +let bottom = outline(out.srad_0.fj0); +gpu(top, bottom); +ip-sroa(*); +sroa(*); +simpl!(*); + gcm(*); -- GitLab