Skip to content
Snippets Groups Projects
Commit d173f53b authored by rarbore2's avatar rarbore2 Committed by Russel Arbore
Browse files

fused sum reduction is very fast

parent 5bfc11c2
No related branches found
No related tags found
1 merge request!203More rodinia optimization
......@@ -9,9 +9,9 @@ macro simpl!(X) {
}
phi-elim(*);
let init_loop = outline(srad@loop1);
let sum_loop = outline(srad@loop1);
let main_loops = outline(srad@loop2 | srad@loop3);
gpu(init_loop, main_loops, extract, compress);
gpu(main_loops, extract, compress);
simpl!(*);
const-inline[true](*);
crc(*);
......@@ -35,4 +35,23 @@ simpl!(*);
slf(*);
simpl!(*);
fork-dim-merge(sum_loop);
simpl!(sum_loop);
fork-tile[32, 0, false, true](sum_loop);
let out = fork-split(sum_loop);
clean-monoid-reduces(sum_loop);
simpl!(sum_loop);
let fission = fork-fission[out.srad_0.fj0](sum_loop);
simpl!(sum_loop);
fork-tile[32, 0, false, true](fission.srad_0.fj_bottom);
let out = fork-split(fission.srad_0.fj_bottom);
clean-monoid-reduces(sum_loop);
simpl!(sum_loop);
let top = outline(fission.srad_0.fj_top);
let bottom = outline(out.srad_0.fj0);
gpu(top, bottom);
ip-sroa(*);
sroa(*);
simpl!(*);
gcm(*);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment