From 9d65ddb384437cb83b63479dabccbc4a1a998bc9 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 19 Feb 2025 09:53:33 -0600
Subject: [PATCH] edge fork schedule, found issue in RT backend

---
 juno_samples/cava/src/cpu.sch                 | 13 +--
 juno_samples/edge_detection/build.rs          |  2 +
 juno_samples/edge_detection/src/cpu.sch       | 79 +++++++++++++++++++
 .../edge_detection/src/edge_detection.jn      | 28 +++----
 4 files changed, 97 insertions(+), 25 deletions(-)
 create mode 100644 juno_samples/edge_detection/src/cpu.sch

diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch
index 3ae1c6bf..3ac2f326 100644
--- a/juno_samples/cava/src/cpu.sch
+++ b/juno_samples/cava/src/cpu.sch
@@ -127,17 +127,8 @@ simpl!(fuse5);
 delete-uncalled(*);
 simpl!(*);
 
-
-fork-split(fuse1);
-unforkify(fuse1);
-fork-split(fuse2);
-unforkify(fuse2);
-fork-split(fuse3);
-unforkify(fuse3);
-fork-split(fuse4);
-unforkify(fuse4);
-fork-split(fuse5);
-unforkify(fuse5);
+fork-split(fuse1, fuse2, fuse3, fuse4, fuse5);
+unforkify(fuse1, fuse2, fuse3, fuse4, fuse5);
 
 simpl!(*);
 
diff --git a/juno_samples/edge_detection/build.rs b/juno_samples/edge_detection/build.rs
index 7071fae7..d5d6f7b7 100644
--- a/juno_samples/edge_detection/build.rs
+++ b/juno_samples/edge_detection/build.rs
@@ -14,6 +14,8 @@ fn main() {
     JunoCompiler::new()
         .file_in_src("edge_detection.jn")
         .unwrap()
+        .schedule_in_src("cpu.sch")
+        .unwrap()
         .build()
         .unwrap();
 }
diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch
new file mode 100644
index 00000000..3c3d09b3
--- /dev/null
+++ b/juno_samples/edge_detection/src/cpu.sch
@@ -0,0 +1,79 @@
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  dce(X);
+  infer-schedules(X);
+}
+
+simpl!(*);
+
+ip-sroa(*);
+sroa(*);
+simpl!(*);
+
+no-memset(gaussian_smoothing@res);
+fixpoint {
+  forkify(gaussian_smoothing);
+  fork-guard-elim(gaussian_smoothing);
+  fork-coalesce(gaussian_smoothing);
+}
+predication(gaussian_smoothing);
+simpl!(gaussian_smoothing);
+predication(gaussian_smoothing);
+simpl!(gaussian_smoothing);
+
+no-memset(laplacian_estimate@res, laplacian_estimate@shr1, laplacian_estimate@shr2);
+fixpoint {
+  forkify(laplacian_estimate);
+  fork-guard-elim(laplacian_estimate);
+  fork-coalesce(laplacian_estimate);
+}
+simpl!(laplacian_estimate);
+
+no-memset(zero_crossings@res, zero_crossings@shr1, zero_crossings@shr2);
+fixpoint {
+  forkify(zero_crossings);
+  fork-guard-elim(zero_crossings);
+  fork-coalesce(zero_crossings);
+}
+simpl!(zero_crossings);
+
+no-memset(gradient@res);
+fixpoint {
+  forkify(gradient);
+  fork-guard-elim(gradient);
+  fork-coalesce(gradient);
+}
+predication(gradient);
+simpl!(gradient);
+predication(gradient);
+simpl!(gradient);
+
+fixpoint {
+  forkify(max_gradient);
+  fork-guard-elim(max_gradient);
+  fork-coalesce(max_gradient);
+}
+simpl!(max_gradient);
+
+no-memset(reject_zero_crossings@res);
+fixpoint {
+  forkify(reject_zero_crossings);
+  fork-guard-elim(reject_zero_crossings);
+  fork-coalesce(reject_zero_crossings);
+}
+predication(reject_zero_crossings);
+simpl!(reject_zero_crossings);
+
+async-call(edge_detection@le, edge_detection@zc);
+
+fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
+unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
+
+simpl!(*);
+
+delete-uncalled(*);
+gcm(*);
diff --git a/juno_samples/edge_detection/src/edge_detection.jn b/juno_samples/edge_detection/src/edge_detection.jn
index d49258c5..3bc5bbfb 100644
--- a/juno_samples/edge_detection/src/edge_detection.jn
+++ b/juno_samples/edge_detection/src/edge_detection.jn
@@ -2,7 +2,7 @@ fn gaussian_smoothing<n, m, gs : usize>(
   input: f32[n, m],
   filter: f32[gs, gs],
 ) -> f32[n, m] {
-  let result : f32[n, m];
+  @res let result : f32[n, m];
 
   // Define the gaussian radius as half the gaussian size
   const gr = gs / 2;
@@ -39,12 +39,12 @@ fn laplacian_estimate<n, m, sz: usize>(
 ) -> f32[n, m] {
   const r = sz / 2;
 
-  let result : f32[n, m];
+  @res let result : f32[n, m];
 
   for row = 0 to n {
     for col = 0 to m {
       // Copy data for dilation filter
-      let imageArea : f32[sz, sz];
+      @shr1 let imageArea : f32[sz, sz];
       for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MIN_BR
@@ -64,7 +64,7 @@ fn laplacian_estimate<n, m, sz: usize>(
       }
 
       // Data copy for erotion filter
-      let imageArea : f32[sz, sz];
+      @shr2 let imageArea : f32[sz, sz];
       for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MAX_BR
@@ -97,12 +97,12 @@ fn zero_crossings<n, m, sz: usize>(
 ) -> f32[n, m] {
   const r = sz / 2;
 
-  let result : f32[n, m];
+  @res let result : f32[n, m];
 
   for row = 0 to n {
     for col = 0 to m {
       // Data copy for dilation filter
-      let imageArea : f32[sz, sz];
+      @shr1 let imageArea : f32[sz, sz];
       for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MIN_BR
@@ -124,7 +124,7 @@ fn zero_crossings<n, m, sz: usize>(
       }
 
       // Data copy for erotion filter
-      let imageArea : f32[sz, sz];
+      @shr2 let imageArea : f32[sz, sz];
       for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MAX_BR
@@ -160,7 +160,7 @@ fn gradient<n, m, sb: usize>(
 ) -> f32[n, m] {
   const sbr = sb / 2;
 
-  let result : f32[n, m];
+  @res let result : f32[n, m];
 
   for row = 0 to n {
     for col = 0 to m {
@@ -206,7 +206,7 @@ fn reject_zero_crossings<n, m: usize>(
   max_gradient: f32,
   theta: f32,
 ) -> f32[n, m] {
-  let result : f32[n, m];
+  @res let result : f32[n, m];
 
   for row = 0 to n {
     for col = 0 to m {
@@ -229,10 +229,10 @@ fn edge_detection<n, m, gs, sz, sb: usize>(
   sy: f32[sb, sb],
   theta: f32,
 ) -> f32[n, m] {
-  let smoothed  = gaussian_smoothing::<n, m, gs>(input, gaussian_filter);
-  let laplacian = laplacian_estimate::<n, m, sz>(smoothed, structure);
-  let zcs       = zero_crossings::<n, m, sz>(laplacian, structure);
-  let gradient  = gradient::<n, m, sb>(smoothed, sx, sy);
-  let maxgrad   = max_gradient::<n, m>(gradient);
+  let smoothed = gaussian_smoothing::<n, m, gs>(input, gaussian_filter);
+  @le let laplacian = laplacian_estimate::<n, m, sz>(smoothed, structure);
+  @zc let zcs = zero_crossings::<n, m, sz>(laplacian, structure);
+  let gradient = gradient::<n, m, sb>(smoothed, sx, sy);
+  let maxgrad = max_gradient::<n, m>(gradient);
   return reject_zero_crossings::<n, m>(zcs, gradient, maxgrad, theta);
 }
-- 
GitLab