Newer
Older
macro simpl!(X) {
ccp(X);
simplify-cfg(X);
lift-dc-math(X);
gvn(X);
phi-elim(X);
dce(X);
infer-schedules(X);
}
let sum_loop = outline(srad@loop1);
let main_loops = outline(srad@loop2 | srad@loop3);
gpu(main_loops, extract, compress);
simpl!(*);
const-inline[true](*);
write-predication(*);
simpl!(*);
predication(*);
simpl!(*);
predication(*);
simpl!(*);
fixpoint {
forkify(*);
fork-guard-elim(*);
fork-coalesce(*);
}
simpl!(*);
reduce-slf(*);
simpl!(*);
array-slf(*);
simpl!(*);
slf(*);
simpl!(*);
fork-dim-merge(sum_loop);
simpl!(sum_loop);
fork-tile[32, 0, false, true](sum_loop);
let out = fork-split(sum_loop);
clean-monoid-reduces(sum_loop);
simpl!(sum_loop);
let fission1 = fork-fission[out.srad_0.fj0](sum_loop);
simpl!(sum_loop);
fork-tile[32, 0, false, true](fission1.srad_0.fj_bottom);
let out = fork-split(fission1.srad_0.fj_bottom);
clean-monoid-reduces(sum_loop);
simpl!(sum_loop);
let fission2 = fork-fission[out.srad_0.fj0](sum_loop);
fork-tile[32, 0, false, true](fission2.srad_0.fj_bottom);
let out = fork-split(fission2.srad_0.fj_bottom);
clean-monoid-reduces(sum_loop);
simpl!(sum_loop);
let first = outline(fission1.srad_0.fj_top);
let second = outline(fission2.srad_0.fj_top);
let third = outline(out.srad_0.fj0);
gpu(first, second, third);
const-inline[false](*);
fork-dim-merge(main_loops);
fork-tile[32, 0, false, true](main_loops);
dce(main_loops);
fork-split(main_loops);
simpl!(main_loops);
fork-dim-merge(extract);
fork-tile[32, 0, false, true](extract);
dce(extract);
fork-split(extract);
simpl!(extract);
fork-dim-merge(compress);
fork-tile[32, 0, false, true](compress);
dce(compress);
fork-split(compress);
simpl!(compress);