cpu.sch 1.23 KiB
macro simpl!(X) {
ccp(X);
simplify-cfg(X);
lift-dc-math(X);
gvn(X);
phi-elim(X);
dce(X);
infer-schedules(X);
}
phi-elim(bfs);
no-memset(bfs@cost);
let init = outline(bfs@cost_init);
let traverse = outline(bfs@loop1);
let collect = outline(bfs@loop2);
simpl!(*);
predication(*);
const-inline(*);
loop-bound-canon(*);
simpl!(*);
fixpoint {
forkify(*);
fork-guard-elim(*);
}
simpl!(*);
predication(*);
simpl!(*);
reduce-slf(*);
simpl!(*);
slf(*);
simpl!(*);
fixpoint {
forkify(collect);
fork-guard-elim(collect);
}
simpl!(collect);
parallel-fork(traverse, collect);
parallel-reduce(traverse, collect);
if !feature("seq") {
fork-tile[32, 0, false, false](traverse, collect);
let (outer, inner) = fork-reshape[[0], [1]](traverse);
traverse = outline(inner);
let (outer, inner) = fork-reshape[[0], [1]](collect);
collect = outline(inner);
fork-tile[32, 0, false, false](init);
let (outer, inner) = fork-reshape[[0], [1]](init);
let init_body = outline(inner);
inline(bfs@cost_init, bfs@loop1, bfs@loop2);
init = init_body;
}
fork-tile[8, 0, false, true](init, traverse, collect);
delete-uncalled(*);
const-inline(*);
simpl!(*);
fork-split(init, traverse, collect);
unforkify(init, traverse, collect);
simpl!(*);
gcm(*);