macro simpl!(X) {
  ccp(X);
  simplify-cfg(X);
  lift-dc-math(X);
  gvn(X);
  phi-elim(X);
  dce(X);
  infer-schedules(X);
}

gpu(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);

simpl!(*);

ip-sroa(*);
sroa(*);
simpl!(*);

no-memset(gaussian_smoothing@res);
fixpoint {
  forkify(gaussian_smoothing);
  fork-guard-elim(gaussian_smoothing);
  fork-coalesce(gaussian_smoothing);
}
predication(gaussian_smoothing);
simpl!(gaussian_smoothing);
predication(gaussian_smoothing);
simpl!(gaussian_smoothing);

no-memset(laplacian_estimate@res, laplacian_estimate@shr1, laplacian_estimate@shr2);
fixpoint {
  forkify(laplacian_estimate);
  fork-guard-elim(laplacian_estimate);
  fork-coalesce(laplacian_estimate);
}
simpl!(laplacian_estimate);

no-memset(zero_crossings@res, zero_crossings@shr1, zero_crossings@shr2);
fixpoint {
  forkify(zero_crossings);
  fork-guard-elim(zero_crossings);
  fork-coalesce(zero_crossings);
}
simpl!(zero_crossings);

no-memset(gradient@res);
fixpoint {
  forkify(gradient);
  fork-guard-elim(gradient);
  fork-coalesce(gradient);
}
predication(gradient);
simpl!(gradient);
predication(gradient);
simpl!(gradient);

fixpoint {
  forkify(max_gradient);
  fork-guard-elim(max_gradient);
  fork-coalesce(max_gradient);
}
simpl!(max_gradient);
fork-dim-merge(max_gradient);
simpl!(max_gradient);
fork-tile[32, 0, false, true](max_gradient);
let out = fork-split(max_gradient);
clean-monoid-reduces(max_gradient);
simpl!(max_gradient);
let fission = fork-fission[out._4_max_gradient.fj0](max_gradient);
simpl!(max_gradient);
fork-tile[32, 0, false, true](fission._4_max_gradient.fj_bottom);
let out = fork-split(fission._4_max_gradient.fj_bottom);
clean-monoid-reduces(max_gradient);
simpl!(max_gradient);
let top = outline(fission._4_max_gradient.fj_top);
let bottom = outline(out._4_max_gradient.fj0);
gpu(top, bottom);
ip-sroa(*);
sroa(*);
simpl!(*);

no-memset(reject_zero_crossings@res);
fixpoint {
  forkify(reject_zero_crossings);
  fork-guard-elim(reject_zero_crossings);
  fork-coalesce(reject_zero_crossings);
}
predication(reject_zero_crossings);
simpl!(reject_zero_crossings);

async-call(edge_detection@le, edge_detection@zc);

simpl!(*);

delete-uncalled(*);
gcm(*);