macro simpl!(X) { ccp(X); simplify-cfg(X); lift-dc-math(X); gvn(X); phi-elim(X); dce(X); infer-schedules(X); } phi-elim(bfs); no-memset(bfs@cost); let init = outline(bfs@cost_init); let loop1 = outline(bfs@loop1); let loop2 = outline(bfs@loop2); let loop3 = outline(bfs@loop3); parallel-reduce(loop1); simpl!(*); predication(*); const-inline(*); loop-bound-canon(*); simpl!(*); fixpoint { forkify(*); fork-guard-elim(*); } simpl!(*); predication(*); simpl!(*); reduce-slf(*); simpl!(*); fork-tile[32, 0, false, true](loop1); fork-split(loop1); gpu(loop1); fixpoint { forkify(loop2, loop3); fork-guard-elim(loop2, loop3); } simpl!(loop2, loop3); fork-tile[32, 0, false, true](loop2, loop3); let out = fork-split(loop2, loop3); clean-monoid-reduces(loop2, loop3); simpl!(loop2, loop3); gpu(loop3); let fission1 = fork-fission[out.bfs_2.fj0](loop2); simpl!(loop2); fork-tile[32, 0, false, true](fission1.bfs_2.fj_bottom); let out = fork-split(fission1.bfs_2.fj_bottom); clean-monoid-reduces(loop2); simpl!(loop2); let fission2 = fork-fission[out.bfs_2.fj0](loop2); simpl!(loop2); fork-tile[32, 0, false, true](fission2.bfs_2.fj_bottom); let out = fork-split(fission2.bfs_2.fj_bottom); clean-monoid-reduces(loop2); simpl!(loop2); let top = outline(fission1.bfs_2.fj_top); let middle = outline(fission2.bfs_2.fj_top); let bottom = outline(out.bfs_2.fj0); const-inline(loop2, top, middle, bottom); no-memset(top, middle); gpu(top, middle, bottom); simpl!(loop2, top, middle, bottom); unforkify(init); gcm(*);