macro simpl!(X) {
  ccp(X);
  simplify-cfg(X);
  lift-dc-math(X);
  gvn(X);
  phi-elim(X);
  dce(X);
  infer-schedules(X);
}

phi-elim(bfs);
no-memset(bfs@cost);
let cost_init = outline(bfs@cost_init);
let loop1 = outline(bfs@loop1);
let loop2 = outline(bfs@loop2);
gpu(loop1, loop2);

simpl!(*);
predication(*);
const-inline(*);
simpl!(*);
fixpoint {
  forkify(*);
  fork-guard-elim(*);
}
simpl!(*);
predication(*);
simpl!(*);

unforkify(cost_init);
parallel-reduce(loop1);
forkify(*);
fork-guard-elim(*);
simpl!(*);
predication(*);
reduce-slf(*);
simpl!(*);

gcm(*);