From 91f3b55cb2d0e66301508d312d0c487cf295a81a Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 20 Feb 2025 10:52:31 -0600
Subject: [PATCH] slow GPU schedule

---
 juno_samples/edge_detection/src/gpu.sch | 85 +++++++++++++++++++++----
 1 file changed, 71 insertions(+), 14 deletions(-)

diff --git a/juno_samples/edge_detection/src/gpu.sch b/juno_samples/edge_detection/src/gpu.sch
index 1e51efb9..3da40fd3 100644
--- a/juno_samples/edge_detection/src/gpu.sch
+++ b/juno_samples/edge_detection/src/gpu.sch
@@ -1,22 +1,79 @@
-gvn(*);
-phi-elim(*);
-dce(*);
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  dce(X);
+  infer-schedules(X);
+}
 
-gpu(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
+simpl!(*);
 
 ip-sroa(*);
 sroa(*);
-dce(*);
-gvn(*);
-phi-elim(*);
-dce(*);
+simpl!(*);
 
-//forkify(*);
-infer-schedules(*);
+no-memset(gaussian_smoothing@res);
+fixpoint {
+  forkify(gaussian_smoothing);
+  fork-guard-elim(gaussian_smoothing);
+  fork-coalesce(gaussian_smoothing);
+}
+predication(gaussian_smoothing);
+simpl!(gaussian_smoothing);
+predication(gaussian_smoothing);
+simpl!(gaussian_smoothing);
 
-gcm(*);
+no-memset(laplacian_estimate@res, laplacian_estimate@shr1, laplacian_estimate@shr2);
+fixpoint {
+  forkify(laplacian_estimate);
+  fork-guard-elim(laplacian_estimate);
+  fork-coalesce(laplacian_estimate);
+}
+simpl!(laplacian_estimate);
+
+no-memset(zero_crossings@res, zero_crossings@shr1, zero_crossings@shr2);
+fixpoint {
+  forkify(zero_crossings);
+  fork-guard-elim(zero_crossings);
+  fork-coalesce(zero_crossings);
+}
+simpl!(zero_crossings);
+
+no-memset(gradient@res);
+fixpoint {
+  forkify(gradient);
+  fork-guard-elim(gradient);
+  fork-coalesce(gradient);
+}
+predication(gradient);
+simpl!(gradient);
+predication(gradient);
+simpl!(gradient);
+
+fixpoint {
+  forkify(max_gradient);
+  fork-guard-elim(max_gradient);
+  fork-coalesce(max_gradient);
+}
+simpl!(max_gradient);
+
+no-memset(reject_zero_crossings@res);
 fixpoint {
-  float-collections(*);
-  dce(*);
-  gcm(*);
+  forkify(reject_zero_crossings);
+  fork-guard-elim(reject_zero_crossings);
+  fork-coalesce(reject_zero_crossings);
 }
+predication(reject_zero_crossings);
+simpl!(reject_zero_crossings);
+
+async-call(edge_detection@le, edge_detection@zc);
+
+gpu(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
+
+simpl!(*);
+
+delete-uncalled(*);
+gcm(*);
+
-- 
GitLab