diff --git a/juno_samples/rodinia/srad/src/gpu.sch b/juno_samples/rodinia/srad/src/gpu.sch index f736c0b776759596cfc93a5025fb4f280c3fc932..f89b7ab8e372ad2148ff23ac41e203a4df9e0942 100644 --- a/juno_samples/rodinia/srad/src/gpu.sch +++ b/juno_samples/rodinia/srad/src/gpu.sch @@ -41,15 +41,26 @@ fork-tile[32, 0, false, true](sum_loop); let out = fork-split(sum_loop); clean-monoid-reduces(sum_loop); simpl!(sum_loop); -let fission = fork-fission[out.srad_0.fj0](sum_loop); + +let fission1 = fork-fission[out.srad_0.fj0](sum_loop); +simpl!(sum_loop); +fork-tile[32, 0, false, true](fission1.srad_0.fj_bottom); +let out = fork-split(fission1.srad_0.fj_bottom); +clean-monoid-reduces(sum_loop); +simpl!(sum_loop); + +let fission2 = fork-fission[out.srad_0.fj0](sum_loop); simpl!(sum_loop); -fork-tile[32, 0, false, true](fission.srad_0.fj_bottom); -let out = fork-split(fission.srad_0.fj_bottom); +fork-tile[32, 0, false, true](fission2.srad_0.fj_bottom); +let out = fork-split(fission2.srad_0.fj_bottom); clean-monoid-reduces(sum_loop); simpl!(sum_loop); -let top = outline(fission.srad_0.fj_top); -let bottom = outline(out.srad_0.fj0); -gpu(top, bottom); + +let first = outline(fission1.srad_0.fj_top); +let second = outline(fission2.srad_0.fj_top); +let third = outline(out.srad_0.fj0); +gpu(first, second, third); +const-inline[false](*); ip-sroa(*); sroa(*); simpl!(*); @@ -60,4 +71,16 @@ dce(main_loops); fork-split(main_loops); simpl!(main_loops); +fork-dim-merge(extract); +fork-tile[32, 0, false, true](extract); +dce(extract); +fork-split(extract); +simpl!(extract); + +fork-dim-merge(compress); +fork-tile[32, 0, false, true](compress); +dce(compress); +fork-split(compress); +simpl!(compress); + gcm(*);