Skip to content
Snippets Groups Projects
gpu.sch 4.32 KiB
macro simpl!(X) {
  ccp(X);
  simplify-cfg(X);
  lift-dc-math(X);
  gvn(X);
  phi-elim(X);
  dce(X);
  infer-schedules(X);
}

gpu(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);

simpl!(*);

ip-sroa(*);
sroa(*);
simpl!(*);

no-memset(gaussian_smoothing@res);
fixpoint {
  forkify(gaussian_smoothing);
  fork-guard-elim(gaussian_smoothing);
  fork-coalesce(gaussian_smoothing);
}
predication(gaussian_smoothing);
simpl!(gaussian_smoothing);
predication(gaussian_smoothing);
simpl!(gaussian_smoothing);
fork-dim-merge(gaussian_smoothing@filter_loop);
unforkify(gaussian_smoothing@filter_loop);
simpl!(gaussian_smoothing);

fork-dim-merge(gaussian_smoothing);
fork-tile[32, 0, false, true](gaussian_smoothing);
simpl!(gaussian_smoothing);
fork-split(gaussian_smoothing);
simpl!(gaussian_smoothing);

no-memset(laplacian_estimate@res);
fixpoint {
  forkify(laplacian_estimate);
  fork-guard-elim(laplacian_estimate);
  fork-coalesce(laplacian_estimate);
}
simpl!(laplacian_estimate);
fork-dim-merge(laplacian_estimate@filter_loop);
unforkify(laplacian_estimate@filter_loop);
simpl!(laplacian_estimate);

fork-dim-merge(laplacian_estimate);
fork-tile[32, 0, false, true](laplacian_estimate);
simpl!(laplacian_estimate);
fork-split(laplacian_estimate);
simpl!(laplacian_estimate);

no-memset(zero_crossings@res);
fixpoint {
  forkify(zero_crossings);
  fork-guard-elim(zero_crossings);
  fork-coalesce(zero_crossings);
}
simpl!(zero_crossings);
fork-dim-merge(zero_crossings@filter_loop);
unforkify(zero_crossings@filter_loop);
simpl!(zero_crossings);

fork-dim-merge(zero_crossings);
fork-tile[32, 0, false, true](zero_crossings);
simpl!(zero_crossings);
fork-split(zero_crossings);
simpl!(zero_crossings);

no-memset(gradient@res);
fixpoint {
  forkify(gradient);
  fork-guard-elim(gradient);
  fork-coalesce(gradient);
}
predication(gradient);
simpl!(gradient);
predication(gradient);
simpl!(gradient);
fork-dim-merge(gradient@filter_loop);
unforkify(gradient@filter_loop);
simpl!(gradient);

fork-dim-merge(gradient);
fork-tile[32, 0, false, true](gradient);
simpl!(gradient);
fork-split(gradient);
simpl!(gradient);

fixpoint {
  forkify(max_gradient);
  fork-guard-elim(max_gradient);
  fork-coalesce(max_gradient);
}

if !feature("seq") {
  if !feature("warp_tile") {
    simpl!(max_gradient);
    fork-dim-merge(max_gradient);
    simpl!(max_gradient);
    fork-tile[32, 0, false, true](max_gradient);
    let out1 = fork-split(max_gradient);
    clean-monoid-reduces(max_gradient);
    simpl!(max_gradient);
    let fission = fork-fission[out1._4_max_gradient.fj0](max_gradient);
    simpl!(max_gradient);
    fork-tile[32, 0, false, true](fission._4_max_gradient.fj_bottom);
    let out2 = fork-split(fission._4_max_gradient.fj_bottom);
    clean-monoid-reduces(max_gradient);
    simpl!(max_gradient);
    unforkify(out1._4_max_gradient.fj1);

    unforkify(out2._4_max_gradient.fj1);
    simpl!(max_gradient);
    let top = outline(fission._4_max_gradient.fj_top);
    let bottom = outline(out2._4_max_gradient.fj0);
    gpu(top, bottom);
  } else {
    simpl!(max_gradient);
    fork-dim-merge(max_gradient);
    simpl!(max_gradient);
    fork-tile[32, 0, false, true](max_gradient);
    let out = fork-split(max_gradient);
    clean-monoid-reduces(max_gradient);
    simpl!(max_gradient);
    let fission = fork-fission[out._4_max_gradient.fj0](max_gradient);
    simpl!(max_gradient);
    fork-tile[32, 0, false, true](fission._4_max_gradient.fj_bottom);
    let out = fork-split(fission._4_max_gradient.fj_bottom);
    clean-monoid-reduces(max_gradient);
    simpl!(max_gradient);
    let top = outline(fission._4_max_gradient.fj_top);
    let bottom = outline(out._4_max_gradient.fj0);
    gpu(top, bottom);
  }
} else {
  simpl!(max_gradient);
  fork-split(max_gradient);
  unforkify(max_gradient);
  gpu(max_gradient);
}

ip-sroa(*);
sroa(*);
simpl!(*);

no-memset(reject_zero_crossings@res);
fixpoint {
  forkify(reject_zero_crossings);
  fork-guard-elim(reject_zero_crossings);
  fork-coalesce(reject_zero_crossings);
}
predication(reject_zero_crossings);
simpl!(reject_zero_crossings);

fork-dim-merge(reject_zero_crossings);
fork-tile[32, 0, false, true](reject_zero_crossings);
simpl!(reject_zero_crossings);
fork-split(reject_zero_crossings);
simpl!(reject_zero_crossings);

async-call(edge_detection@le, edge_detection@zc);

simpl!(*);

delete-uncalled(*);
gcm(*);