From d83d48cf583e9e0454f048f2b9ebaff4f9c73ab8 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 24 Feb 2025 20:55:23 -0600
Subject: [PATCH] parallelize parts of edge

---
 juno_samples/edge_detection/src/cpu.sch       | 28 +++++++++++++++++--
 .../edge_detection/src/edge_detection.jn      | 24 ++++++++--------
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch
index ead722ce..cb65d183 100644
--- a/juno_samples/edge_detection/src/cpu.sch
+++ b/juno_samples/edge_detection/src/cpu.sch
@@ -24,6 +24,14 @@ predication(gaussian_smoothing);
 simpl!(gaussian_smoothing);
 predication(gaussian_smoothing);
 simpl!(gaussian_smoothing);
+let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop;
+fork-tile[4, 1, false, false](par);
+fork-tile[4, 0, false, false](par);
+fork-interchange[1, 2](par);
+let split = fork-split(par);
+let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2);
+fork-coalesce(gaussian_smoothing, gaussian_smoothing_body);
+simpl!(gaussian_smoothing, gaussian_smoothing_body);
 
 no-memset(laplacian_estimate@res, laplacian_estimate@shr1, laplacian_estimate@shr2);
 fixpoint {
@@ -32,6 +40,14 @@ fixpoint {
   fork-coalesce(laplacian_estimate);
 }
 simpl!(laplacian_estimate);
+let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop;
+fork-tile[4, 1, false, false](par);
+fork-tile[4, 0, false, false](par);
+fork-interchange[1, 2](par);
+let split = fork-split(par);
+let laplacian_estimate_body = outline(split._1_laplacian_estimate.fj2);
+fork-coalesce(laplacian_estimate, laplacian_estimate_body);
+simpl!(laplacian_estimate, laplacian_estimate_body);
 
 no-memset(zero_crossings@res, zero_crossings@shr1, zero_crossings@shr2);
 fixpoint {
@@ -40,6 +56,14 @@ fixpoint {
   fork-coalesce(zero_crossings);
 }
 simpl!(zero_crossings);
+let par = zero_crossings@image_loop \ zero_crossings@filter_loop;
+fork-tile[4, 1, false, false](par);
+fork-tile[4, 0, false, false](par);
+fork-interchange[1, 2](par);
+let split = fork-split(par);
+let zero_crossings_body = outline(split._2_zero_crossings.fj2);
+fork-coalesce(zero_crossings, zero_crossings_body);
+simpl!(zero_crossings, zero_crossings_body);
 
 no-memset(gradient@res);
 fixpoint {
@@ -81,8 +105,8 @@ simpl!(reject_zero_crossings);
 
 async-call(edge_detection@le, edge_detection@zc);
 
-fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);
-unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);
+fork-split(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings);
+unforkify(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings);
 
 simpl!(*);
 
diff --git a/juno_samples/edge_detection/src/edge_detection.jn b/juno_samples/edge_detection/src/edge_detection.jn
index e1413488..0b8e71da 100644
--- a/juno_samples/edge_detection/src/edge_detection.jn
+++ b/juno_samples/edge_detection/src/edge_detection.jn
@@ -7,11 +7,11 @@ fn gaussian_smoothing<n, m, gs : usize>(
   // Define the gaussian radius as half the gaussian size
   const gr = gs / 2;
 
-  for row = 0 to n {
+  @image_loop for row = 0 to n {
     for col = 0 to m {
       let smoothed = 0.0;
 
-      for i = 0 to gs {
+      @filter_loop for i = 0 to gs {
         for j = 0 to gs {
           let val = input[if row + i < gr               then 0
                           else if row + i - gr > n - 1  then n - 1
@@ -41,11 +41,11 @@ fn laplacian_estimate<n, m, sz: usize>(
 
   @res let result : f32[n, m];
 
-  for row = 0 to n {
+  @image_loop for row = 0 to n {
     for col = 0 to m {
       // Copy data for dilation filter
       @shr1 let imageArea : f32[sz, sz];
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MIN_BR
                             else if row + i - r > n - 1 then MIN_BR
@@ -57,7 +57,7 @@ fn laplacian_estimate<n, m, sz: usize>(
 
       // Compute pixel of dilated image
       let dilated = MIN_BR;
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           dilated = max!(dilated, imageArea[i, j] * structure[i, j]);
         }
@@ -65,7 +65,7 @@ fn laplacian_estimate<n, m, sz: usize>(
 
       // Data copy for erotion filter
       @shr2 let imageArea : f32[sz, sz];
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MAX_BR
                             else if row + i - r > n - 1 then MAX_BR
@@ -77,7 +77,7 @@ fn laplacian_estimate<n, m, sz: usize>(
 
       // Compute pixel of eroded image
       let eroded = MAX_BR;
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           eroded = min!(eroded, imageArea[i, j] * structure[i, j]);
         }
@@ -99,11 +99,11 @@ fn zero_crossings<n, m, sz: usize>(
 
   @res let result : f32[n, m];
 
-  for row = 0 to n {
+  @image_loop for row = 0 to n {
     for col = 0 to m {
       // Data copy for dilation filter
       @shr1 let imageArea : f32[sz, sz];
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MIN_BR
                             else if row + i - r > n - 1 then MIN_BR
@@ -117,7 +117,7 @@ fn zero_crossings<n, m, sz: usize>(
 
       // Compute the pixel of dilated image
       let dilated = MIN_BR;
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           dilated = max!(dilated, imageArea[i, j] * structure[i, j]);
         }
@@ -125,7 +125,7 @@ fn zero_crossings<n, m, sz: usize>(
 
       // Data copy for erotion filter
       @shr2 let imageArea : f32[sz, sz];
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MAX_BR
                             else if row + i - r > n - 1 then MAX_BR
@@ -139,7 +139,7 @@ fn zero_crossings<n, m, sz: usize>(
 
       // Compute the pixel of eroded image
       let eroded = MAX_BR;
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           eroded = min!(eroded, imageArea[i, j] * structure[i, j]);
         }
-- 
GitLab