Skip to content
Snippets Groups Projects

More optimization

Merged rarbore2 requested to merge miranda_opts2 into main
1 file
+ 29
6
Compare changes
  • Side-by-side
  • Inline
@@ -41,15 +41,26 @@ fork-tile[32, 0, false, true](sum_loop);
let out = fork-split(sum_loop);
clean-monoid-reduces(sum_loop);
simpl!(sum_loop);
let fission = fork-fission[out.srad_0.fj0](sum_loop);
let fission1 = fork-fission[out.srad_0.fj0](sum_loop);
simpl!(sum_loop);
fork-tile[32, 0, false, true](fission1.srad_0.fj_bottom);
let out = fork-split(fission1.srad_0.fj_bottom);
clean-monoid-reduces(sum_loop);
simpl!(sum_loop);
let fission2 = fork-fission[out.srad_0.fj0](sum_loop);
simpl!(sum_loop);
fork-tile[32, 0, false, true](fission.srad_0.fj_bottom);
let out = fork-split(fission.srad_0.fj_bottom);
fork-tile[32, 0, false, true](fission2.srad_0.fj_bottom);
let out = fork-split(fission2.srad_0.fj_bottom);
clean-monoid-reduces(sum_loop);
simpl!(sum_loop);
let top = outline(fission.srad_0.fj_top);
let bottom = outline(out.srad_0.fj0);
gpu(top, bottom);
let first = outline(fission1.srad_0.fj_top);
let second = outline(fission2.srad_0.fj_top);
let third = outline(out.srad_0.fj0);
gpu(first, second, third);
const-inline[false](*);
ip-sroa(*);
sroa(*);
simpl!(*);
@@ -60,4 +71,16 @@ dce(main_loops);
fork-split(main_loops);
simpl!(main_loops);
fork-dim-merge(extract);
fork-tile[32, 0, false, true](extract);
dce(extract);
fork-split(extract);
simpl!(extract);
fork-dim-merge(compress);
fork-tile[32, 0, false, true](compress);
dce(compress);
fork-split(compress);
simpl!(compress);
gcm(*);
Loading