diff --git a/juno_samples/rodinia/srad/src/gpu.sch b/juno_samples/rodinia/srad/src/gpu.sch index f7885f9b2e9ed693054be3166a4ca6c285aa8700..289548f9e01cdf402a3e1b1057fa52d4029f6173 100644 --- a/juno_samples/rodinia/srad/src/gpu.sch +++ b/juno_samples/rodinia/srad/src/gpu.sch @@ -9,9 +9,9 @@ macro simpl!(X) { } phi-elim(*); -let init_loop = outline(srad@loop1); +let sum_loop = outline(srad@loop1); let main_loops = outline(srad@loop2 | srad@loop3); -gpu(init_loop, main_loops, extract, compress); +gpu(main_loops, extract, compress); simpl!(*); const-inline[true](*); crc(*); @@ -35,4 +35,23 @@ simpl!(*); slf(*); simpl!(*); +fork-dim-merge(sum_loop); +simpl!(sum_loop); +fork-tile[32, 0, false, true](sum_loop); +let out = fork-split(sum_loop); +clean-monoid-reduces(sum_loop); +simpl!(sum_loop); +let fission = fork-fission[out.srad_0.fj0](sum_loop); +simpl!(sum_loop); +fork-tile[32, 0, false, true](fission.srad_0.fj_bottom); +let out = fork-split(fission.srad_0.fj_bottom); +clean-monoid-reduces(sum_loop); +simpl!(sum_loop); +let top = outline(fission.srad_0.fj_top); +let bottom = outline(out.srad_0.fj0); +gpu(top, bottom); +ip-sroa(*); +sroa(*); +simpl!(*); + gcm(*);