From 9d65ddb384437cb83b63479dabccbc4a1a998bc9 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 19 Feb 2025 09:53:33 -0600
Subject: [PATCH 1/3] edge fork schedule, found issue in RT backend

---
 juno_samples/cava/src/cpu.sch                 | 13 +--
 juno_samples/edge_detection/build.rs          |  2 +
 juno_samples/edge_detection/src/cpu.sch       | 79 +++++++++++++++++++
 .../edge_detection/src/edge_detection.jn      | 28 +++----
 4 files changed, 97 insertions(+), 25 deletions(-)
 create mode 100644 juno_samples/edge_detection/src/cpu.sch

diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch
index 3ae1c6bf..3ac2f326 100644
--- a/juno_samples/cava/src/cpu.sch
+++ b/juno_samples/cava/src/cpu.sch
@@ -127,17 +127,8 @@ simpl!(fuse5);
 delete-uncalled(*);
 simpl!(*);
 
-
-fork-split(fuse1);
-unforkify(fuse1);
-fork-split(fuse2);
-unforkify(fuse2);
-fork-split(fuse3);
-unforkify(fuse3);
-fork-split(fuse4);
-unforkify(fuse4);
-fork-split(fuse5);
-unforkify(fuse5);
+fork-split(fuse1, fuse2, fuse3, fuse4, fuse5);
+unforkify(fuse1, fuse2, fuse3, fuse4, fuse5);
 
 simpl!(*);
 
diff --git a/juno_samples/edge_detection/build.rs b/juno_samples/edge_detection/build.rs
index 7071fae7..d5d6f7b7 100644
--- a/juno_samples/edge_detection/build.rs
+++ b/juno_samples/edge_detection/build.rs
@@ -14,6 +14,8 @@ fn main() {
     JunoCompiler::new()
         .file_in_src("edge_detection.jn")
         .unwrap()
+        .schedule_in_src("cpu.sch")
+        .unwrap()
         .build()
         .unwrap();
 }
diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch
new file mode 100644
index 00000000..3c3d09b3
--- /dev/null
+++ b/juno_samples/edge_detection/src/cpu.sch
@@ -0,0 +1,79 @@
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  dce(X);
+  infer-schedules(X);
+}
+
+simpl!(*);
+
+ip-sroa(*);
+sroa(*);
+simpl!(*);
+
+no-memset(gaussian_smoothing@res);
+fixpoint {
+  forkify(gaussian_smoothing);
+  fork-guard-elim(gaussian_smoothing);
+  fork-coalesce(gaussian_smoothing);
+}
+predication(gaussian_smoothing);
+simpl!(gaussian_smoothing);
+predication(gaussian_smoothing);
+simpl!(gaussian_smoothing);
+
+no-memset(laplacian_estimate@res, laplacian_estimate@shr1, laplacian_estimate@shr2);
+fixpoint {
+  forkify(laplacian_estimate);
+  fork-guard-elim(laplacian_estimate);
+  fork-coalesce(laplacian_estimate);
+}
+simpl!(laplacian_estimate);
+
+no-memset(zero_crossings@res, zero_crossings@shr1, zero_crossings@shr2);
+fixpoint {
+  forkify(zero_crossings);
+  fork-guard-elim(zero_crossings);
+  fork-coalesce(zero_crossings);
+}
+simpl!(zero_crossings);
+
+no-memset(gradient@res);
+fixpoint {
+  forkify(gradient);
+  fork-guard-elim(gradient);
+  fork-coalesce(gradient);
+}
+predication(gradient);
+simpl!(gradient);
+predication(gradient);
+simpl!(gradient);
+
+fixpoint {
+  forkify(max_gradient);
+  fork-guard-elim(max_gradient);
+  fork-coalesce(max_gradient);
+}
+simpl!(max_gradient);
+
+no-memset(reject_zero_crossings@res);
+fixpoint {
+  forkify(reject_zero_crossings);
+  fork-guard-elim(reject_zero_crossings);
+  fork-coalesce(reject_zero_crossings);
+}
+predication(reject_zero_crossings);
+simpl!(reject_zero_crossings);
+
+async-call(edge_detection@le, edge_detection@zc);
+
+fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
+unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
+
+simpl!(*);
+
+delete-uncalled(*);
+gcm(*);
diff --git a/juno_samples/edge_detection/src/edge_detection.jn b/juno_samples/edge_detection/src/edge_detection.jn
index d49258c5..3bc5bbfb 100644
--- a/juno_samples/edge_detection/src/edge_detection.jn
+++ b/juno_samples/edge_detection/src/edge_detection.jn
@@ -2,7 +2,7 @@ fn gaussian_smoothing<n, m, gs : usize>(
   input: f32[n, m],
   filter: f32[gs, gs],
 ) -> f32[n, m] {
-  let result : f32[n, m];
+  @res let result : f32[n, m];
 
   // Define the gaussian radius as half the gaussian size
   const gr = gs / 2;
@@ -39,12 +39,12 @@ fn laplacian_estimate<n, m, sz: usize>(
 ) -> f32[n, m] {
   const r = sz / 2;
 
-  let result : f32[n, m];
+  @res let result : f32[n, m];
 
   for row = 0 to n {
     for col = 0 to m {
       // Copy data for dilation filter
-      let imageArea : f32[sz, sz];
+      @shr1 let imageArea : f32[sz, sz];
       for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MIN_BR
@@ -64,7 +64,7 @@ fn laplacian_estimate<n, m, sz: usize>(
       }
 
       // Data copy for erotion filter
-      let imageArea : f32[sz, sz];
+      @shr2 let imageArea : f32[sz, sz];
       for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MAX_BR
@@ -97,12 +97,12 @@ fn zero_crossings<n, m, sz: usize>(
 ) -> f32[n, m] {
   const r = sz / 2;
 
-  let result : f32[n, m];
+  @res let result : f32[n, m];
 
   for row = 0 to n {
     for col = 0 to m {
       // Data copy for dilation filter
-      let imageArea : f32[sz, sz];
+      @shr1 let imageArea : f32[sz, sz];
       for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MIN_BR
@@ -124,7 +124,7 @@ fn zero_crossings<n, m, sz: usize>(
       }
 
       // Data copy for erotion filter
-      let imageArea : f32[sz, sz];
+      @shr2 let imageArea : f32[sz, sz];
       for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MAX_BR
@@ -160,7 +160,7 @@ fn gradient<n, m, sb: usize>(
 ) -> f32[n, m] {
   const sbr = sb / 2;
 
-  let result : f32[n, m];
+  @res let result : f32[n, m];
 
   for row = 0 to n {
     for col = 0 to m {
@@ -206,7 +206,7 @@ fn reject_zero_crossings<n, m: usize>(
   max_gradient: f32,
   theta: f32,
 ) -> f32[n, m] {
-  let result : f32[n, m];
+  @res let result : f32[n, m];
 
   for row = 0 to n {
     for col = 0 to m {
@@ -229,10 +229,10 @@ fn edge_detection<n, m, gs, sz, sb: usize>(
   sy: f32[sb, sb],
   theta: f32,
 ) -> f32[n, m] {
-  let smoothed  = gaussian_smoothing::<n, m, gs>(input, gaussian_filter);
-  let laplacian = laplacian_estimate::<n, m, sz>(smoothed, structure);
-  let zcs       = zero_crossings::<n, m, sz>(laplacian, structure);
-  let gradient  = gradient::<n, m, sb>(smoothed, sx, sy);
-  let maxgrad   = max_gradient::<n, m>(gradient);
+  let smoothed = gaussian_smoothing::<n, m, gs>(input, gaussian_filter);
+  @le let laplacian = laplacian_estimate::<n, m, sz>(smoothed, structure);
+  @zc let zcs = zero_crossings::<n, m, sz>(laplacian, structure);
+  let gradient = gradient::<n, m, sb>(smoothed, sx, sy);
+  let maxgrad = max_gradient::<n, m>(gradient);
   return reject_zero_crossings::<n, m>(zcs, gradient, maxgrad, theta);
 }
-- 
GitLab


From 8734113e254ac8764a70237f0ce09293b2d62bfb Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 19 Feb 2025 10:43:42 -0600
Subject: [PATCH 2/3] Use channels to send values between async calls, wrap in
 arcs to share

---
 hercules_cg/src/rt.rs | 58 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 15 deletions(-)

diff --git a/hercules_cg/src/rt.rs b/hercules_cg/src/rt.rs
index 5edddd86..bd152074 100644
--- a/hercules_cg/src/rt.rs
+++ b/hercules_cg/src/rt.rs
@@ -560,23 +560,33 @@ impl<'a> RTContext<'a> {
                 // same interface as AsyncRust functions.
                 let block = &mut blocks.get_mut(&bb).unwrap().data;
                 let is_async = func.schedules[id.idx()].contains(&Schedule::AsyncCall);
+                if is_async {
+                    for arg in args {
+                        if let Some(arc) = self.clone_arc(*arg, false) {
+                            write!(block, "{}", arc)?;
+                        }
+                    }
+                }
                 let device = self.devices[callee_id.idx()];
                 let prefix = match (device, is_async) {
-                    (Device::AsyncRust, false) => "",
-                    (_, false) => "",
-                    (Device::AsyncRust, true) => "Some(::async_std::task::spawn(",
-                    (_, true) => "Some(::async_std::task::spawn(async move {",
+                    (Device::AsyncRust, false) | (_, false) => {
+                        format!("{} = ", self.get_value(id, bb, true))
+                    }
+                    (_, true) => format!(
+                        "{}::async_std::task::spawn(async move {{ async_call_sender_{}.send(",
+                        self.clone_arc(id, true).unwrap(),
+                        id.idx()
+                    ),
                 };
                 let postfix = match (device, is_async) {
                     (Device::AsyncRust, false) => ".await",
                     (_, false) => "",
-                    (Device::AsyncRust, true) => "))",
-                    (_, true) => "}))",
+                    (Device::AsyncRust, true) => ".await).await})",
+                    (_, true) => ").await})",
                 };
                 write!(
                     block,
-                    "{} = {}{}(",
-                    self.get_value(id, bb, true),
+                    "{}{}(",
                     prefix,
                     self.module.functions[callee_id.idx()].name
                 )?;
@@ -1069,11 +1079,15 @@ impl<'a> RTContext<'a> {
             }
 
             // If the node is a call with an AsyncCall schedule, it should be
-            // spawned as a task and awaited later.
+            // lowered to a channel.
             let is_async_call =
                 func.nodes[idx].is_call() && func.schedules[idx].contains(&Schedule::AsyncCall);
             if is_async_call {
-                write!(w, "let mut async_call_{} = None;", idx)?;
+                write!(
+                    w,
+                    "let mut async_call_channel_{} = ::async_std::channel::bounded(1);let async_call_sender_{} = ::std::sync::Arc::new(async_call_channel_{}.0);let async_call_receiver_{} = ::std::sync::Arc::new(async_call_channel_{}.1);",
+                    idx, idx, idx, idx, idx
+                )?;
             } else {
                 write!(
                     w,
@@ -1356,16 +1370,30 @@ impl<'a> RTContext<'a> {
         } else if func.nodes[id.idx()].is_call()
             && func.schedules[id.idx()].contains(&Schedule::AsyncCall)
         {
-            format!(
-                "async_call_{}{}",
-                id.idx(),
-                if lhs { "" } else { ".unwrap().await" }
-            )
+            assert!(!lhs);
+            format!("async_call_receiver_{}.recv().await.unwrap()", id.idx(),)
         } else {
             format!("node_{}", id.idx())
         }
     }
 
+    fn clone_arc(&self, id: NodeID, lhs: bool) -> Option<String> {
+        let func = self.get_func();
+        if func.nodes[id.idx()].is_call() && func.schedules[id.idx()].contains(&Schedule::AsyncCall)
+        {
+            let kind = if lhs { "sender" } else { "receiver" };
+            Some(format!(
+                "let async_call_{}_{} = async_call_{}_{}.clone();",
+                kind,
+                id.idx(),
+                kind,
+                id.idx()
+            ))
+        } else {
+            None
+        }
+    }
+
     fn get_type(&self, id: TypeID) -> &'static str {
         convert_type(&self.module.types[id.idx()])
     }
-- 
GitLab


From 724116691fb2aed3a3dfdf352a7d2d635d1c1a8b Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 19 Feb 2025 10:48:52 -0600
Subject: [PATCH 3/3] fix

---
 juno_samples/edge_detection/Cargo.toml | 1 -
 juno_samples/edge_detection/src/lib.rs | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/juno_samples/edge_detection/Cargo.toml b/juno_samples/edge_detection/Cargo.toml
index fc5fb451..483724d8 100644
--- a/juno_samples/edge_detection/Cargo.toml
+++ b/juno_samples/edge_detection/Cargo.toml
@@ -15,7 +15,6 @@ required-features = ["opencv"]
 
 [lib]
 path = "src/lib.rs"
-required-features = ["opencv"]
 
 [build-dependencies]
 juno_build = { path = "../../juno_build" }
diff --git a/juno_samples/edge_detection/src/lib.rs b/juno_samples/edge_detection/src/lib.rs
index 37268b56..6c2a15bd 100644
--- a/juno_samples/edge_detection/src/lib.rs
+++ b/juno_samples/edge_detection/src/lib.rs
@@ -1,3 +1,4 @@
+#![cfg(feature = "opencv")]
 #![feature(concat_idents)]
 
 mod edge_detection_rust;
-- 
GitLab