diff --git a/juno_samples/rodinia/srad/src/gpu.sch b/juno_samples/rodinia/srad/src/gpu.sch
index f7885f9b2e9ed693054be3166a4ca6c285aa8700..289548f9e01cdf402a3e1b1057fa52d4029f6173 100644
--- a/juno_samples/rodinia/srad/src/gpu.sch
+++ b/juno_samples/rodinia/srad/src/gpu.sch
@@ -9,9 +9,9 @@ macro simpl!(X) {
 }
 
 phi-elim(*);
-let init_loop = outline(srad@loop1);
+let sum_loop = outline(srad@loop1);
 let main_loops = outline(srad@loop2 | srad@loop3);
-gpu(init_loop, main_loops, extract, compress);
+gpu(main_loops, extract, compress);
 simpl!(*);
 const-inline[true](*);
 crc(*);
@@ -35,4 +35,23 @@ simpl!(*);
 slf(*);
 simpl!(*);
 
+fork-dim-merge(sum_loop);
+simpl!(sum_loop);
+fork-tile[32, 0, false, true](sum_loop);
+let out = fork-split(sum_loop);
+clean-monoid-reduces(sum_loop);
+simpl!(sum_loop);
+let fission = fork-fission[out.srad_0.fj0](sum_loop);
+simpl!(sum_loop);
+fork-tile[32, 0, false, true](fission.srad_0.fj_bottom);
+let out = fork-split(fission.srad_0.fj_bottom);
+clean-monoid-reduces(sum_loop);
+simpl!(sum_loop);
+let top = outline(fission.srad_0.fj_top);
+let bottom = outline(out.srad_0.fj0);
+gpu(top, bottom);
+ip-sroa(*);
+sroa(*);
+simpl!(*);
+
 gcm(*);