-
xavierrouth authoredxavierrouth authored
gpu.sch 4.32 KiB
macro simpl!(X) {
ccp(X);
simplify-cfg(X);
lift-dc-math(X);
gvn(X);
phi-elim(X);
dce(X);
infer-schedules(X);
}
gpu(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);
simpl!(*);
ip-sroa(*);
sroa(*);
simpl!(*);
no-memset(gaussian_smoothing@res);
fixpoint {
forkify(gaussian_smoothing);
fork-guard-elim(gaussian_smoothing);
fork-coalesce(gaussian_smoothing);
}
predication(gaussian_smoothing);
simpl!(gaussian_smoothing);
predication(gaussian_smoothing);
simpl!(gaussian_smoothing);
fork-dim-merge(gaussian_smoothing@filter_loop);
unforkify(gaussian_smoothing@filter_loop);
simpl!(gaussian_smoothing);
fork-dim-merge(gaussian_smoothing);
fork-tile[32, 0, false, true](gaussian_smoothing);
simpl!(gaussian_smoothing);
fork-split(gaussian_smoothing);
simpl!(gaussian_smoothing);
no-memset(laplacian_estimate@res);
fixpoint {
forkify(laplacian_estimate);
fork-guard-elim(laplacian_estimate);
fork-coalesce(laplacian_estimate);
}
simpl!(laplacian_estimate);
fork-dim-merge(laplacian_estimate@filter_loop);
unforkify(laplacian_estimate@filter_loop);
simpl!(laplacian_estimate);
fork-dim-merge(laplacian_estimate);
fork-tile[32, 0, false, true](laplacian_estimate);
simpl!(laplacian_estimate);
fork-split(laplacian_estimate);
simpl!(laplacian_estimate);
no-memset(zero_crossings@res);
fixpoint {
forkify(zero_crossings);
fork-guard-elim(zero_crossings);
fork-coalesce(zero_crossings);
}
simpl!(zero_crossings);
fork-dim-merge(zero_crossings@filter_loop);
unforkify(zero_crossings@filter_loop);
simpl!(zero_crossings);
fork-dim-merge(zero_crossings);
fork-tile[32, 0, false, true](zero_crossings);
simpl!(zero_crossings);
fork-split(zero_crossings);
simpl!(zero_crossings);
no-memset(gradient@res);
fixpoint {
forkify(gradient);
fork-guard-elim(gradient);
fork-coalesce(gradient);
}
predication(gradient);
simpl!(gradient);
predication(gradient);
simpl!(gradient);
fork-dim-merge(gradient@filter_loop);
unforkify(gradient@filter_loop);
simpl!(gradient);
fork-dim-merge(gradient);
fork-tile[32, 0, false, true](gradient);
simpl!(gradient);
fork-split(gradient);
simpl!(gradient);
fixpoint {
forkify(max_gradient);
fork-guard-elim(max_gradient);
fork-coalesce(max_gradient);
}
if !feature("seq") {
if !feature("warp_tile") {
simpl!(max_gradient);
fork-dim-merge(max_gradient);
simpl!(max_gradient);
fork-tile[32, 0, false, true](max_gradient);
let out1 = fork-split(max_gradient);
clean-monoid-reduces(max_gradient);
simpl!(max_gradient);
let fission = fork-fission[out1._4_max_gradient.fj0](max_gradient);
simpl!(max_gradient);
fork-tile[32, 0, false, true](fission._4_max_gradient.fj_bottom);
let out2 = fork-split(fission._4_max_gradient.fj_bottom);
clean-monoid-reduces(max_gradient);
simpl!(max_gradient);
unforkify(out1._4_max_gradient.fj1);
unforkify(out2._4_max_gradient.fj1);
simpl!(max_gradient);
let top = outline(fission._4_max_gradient.fj_top);
let bottom = outline(out2._4_max_gradient.fj0);
gpu(top, bottom);
} else {
simpl!(max_gradient);
fork-dim-merge(max_gradient);
simpl!(max_gradient);
fork-tile[32, 0, false, true](max_gradient);
let out = fork-split(max_gradient);
clean-monoid-reduces(max_gradient);
simpl!(max_gradient);
let fission = fork-fission[out._4_max_gradient.fj0](max_gradient);
simpl!(max_gradient);
fork-tile[32, 0, false, true](fission._4_max_gradient.fj_bottom);
let out = fork-split(fission._4_max_gradient.fj_bottom);
clean-monoid-reduces(max_gradient);
simpl!(max_gradient);
let top = outline(fission._4_max_gradient.fj_top);
let bottom = outline(out._4_max_gradient.fj0);
gpu(top, bottom);
}
} else {
simpl!(max_gradient);
fork-split(max_gradient);
unforkify(max_gradient);
gpu(max_gradient);
}
ip-sroa(*);
sroa(*);
simpl!(*);
no-memset(reject_zero_crossings@res);
fixpoint {
forkify(reject_zero_crossings);
fork-guard-elim(reject_zero_crossings);
fork-coalesce(reject_zero_crossings);
}
predication(reject_zero_crossings);
simpl!(reject_zero_crossings);
fork-dim-merge(reject_zero_crossings);
fork-tile[32, 0, false, true](reject_zero_crossings);
simpl!(reject_zero_crossings);
fork-split(reject_zero_crossings);
simpl!(reject_zero_crossings);
async-call(edge_detection@le, edge_detection@zc);
simpl!(*);
delete-uncalled(*);
gcm(*);