Skip to content
Snippets Groups Projects

More optimizations

Merged rarbore2 requested to merge more_opt3 into main
1 file
+ 41
9
Compare changes
  • Side-by-side
  • Inline
@@ -10,14 +10,16 @@ macro simpl!(X) {
phi-elim(bfs);
no-memset(bfs@cost);
let cost_init = outline(bfs@cost_init);
let init = outline(bfs@cost_init);
let loop1 = outline(bfs@loop1);
let loop2 = outline(bfs@loop2);
gpu(loop1, loop2);
let loop3 = outline(bfs@loop3);
parallel-reduce(loop1);
simpl!(*);
predication(*);
const-inline(*);
loop-bound-canon(*);
simpl!(*);
fixpoint {
forkify(*);
@@ -26,14 +28,44 @@ fixpoint {
simpl!(*);
predication(*);
simpl!(*);
unforkify(cost_init);
parallel-reduce(loop1);
forkify(*);
fork-guard-elim(*);
simpl!(*);
predication(*);
reduce-slf(*);
simpl!(*);
fork-tile[32, 0, false, true](loop1);
fork-split(loop1);
gpu(loop1);
fixpoint {
forkify(loop2, loop3);
fork-guard-elim(loop2, loop3);
}
simpl!(loop2, loop3);
fork-tile[32, 0, false, true](loop2, loop3);
let out = fork-split(loop2, loop3);
clean-monoid-reduces(loop2, loop3);
simpl!(loop2, loop3);
gpu(loop3);
let fission1 = fork-fission[out.bfs_2.fj0](loop2);
simpl!(loop2);
fork-tile[32, 0, false, true](fission1.bfs_2.fj_bottom);
let out = fork-split(fission1.bfs_2.fj_bottom);
clean-monoid-reduces(loop2);
simpl!(loop2);
let fission2 = fork-fission[out.bfs_2.fj0](loop2);
simpl!(loop2);
fork-tile[32, 0, false, true](fission2.bfs_2.fj_bottom);
let out = fork-split(fission2.bfs_2.fj_bottom);
clean-monoid-reduces(loop2);
simpl!(loop2);
let top = outline(fission1.bfs_2.fj_top);
let middle = outline(fission2.bfs_2.fj_top);
let bottom = outline(out.bfs_2.fj0);
const-inline(loop2, top, middle, bottom);
no-memset(top, middle);
gpu(top, middle, bottom);
simpl!(loop2, top, middle, bottom);
unforkify(init);
gcm(*);
Loading