From d83d48cf583e9e0454f048f2b9ebaff4f9c73ab8 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Mon, 24 Feb 2025 20:55:23 -0600 Subject: [PATCH] parallelize parts of edge --- juno_samples/edge_detection/src/cpu.sch | 28 +++++++++++++++++-- .../edge_detection/src/edge_detection.jn | 24 ++++++++-------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch index ead722ce..cb65d183 100644 --- a/juno_samples/edge_detection/src/cpu.sch +++ b/juno_samples/edge_detection/src/cpu.sch @@ -24,6 +24,14 @@ predication(gaussian_smoothing); simpl!(gaussian_smoothing); predication(gaussian_smoothing); simpl!(gaussian_smoothing); +let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop; +fork-tile[4, 1, false, false](par); +fork-tile[4, 0, false, false](par); +fork-interchange[1, 2](par); +let split = fork-split(par); +let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2); +fork-coalesce(gaussian_smoothing, gaussian_smoothing_body); +simpl!(gaussian_smoothing, gaussian_smoothing_body); no-memset(laplacian_estimate@res, laplacian_estimate@shr1, laplacian_estimate@shr2); fixpoint { @@ -32,6 +40,14 @@ fixpoint { fork-coalesce(laplacian_estimate); } simpl!(laplacian_estimate); +let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop; +fork-tile[4, 1, false, false](par); +fork-tile[4, 0, false, false](par); +fork-interchange[1, 2](par); +let split = fork-split(par); +let laplacian_estimate_body = outline(split._1_laplacian_estimate.fj2); +fork-coalesce(laplacian_estimate, laplacian_estimate_body); +simpl!(laplacian_estimate, laplacian_estimate_body); no-memset(zero_crossings@res, zero_crossings@shr1, zero_crossings@shr2); fixpoint { @@ -40,6 +56,14 @@ fixpoint { fork-coalesce(zero_crossings); } simpl!(zero_crossings); +let par = zero_crossings@image_loop \ zero_crossings@filter_loop; +fork-tile[4, 1, false, false](par); +fork-tile[4, 0, false, false](par); +fork-interchange[1, 2](par); +let split = fork-split(par); +let zero_crossings_body = outline(split._2_zero_crossings.fj2); +fork-coalesce(zero_crossings, zero_crossings_body); +simpl!(zero_crossings, zero_crossings_body); no-memset(gradient@res); fixpoint { @@ -81,8 +105,8 @@ simpl!(reject_zero_crossings); async-call(edge_detection@le, edge_detection@zc); -fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings); -unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings); +fork-split(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings); +unforkify(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings); simpl!(*); diff --git a/juno_samples/edge_detection/src/edge_detection.jn b/juno_samples/edge_detection/src/edge_detection.jn index e1413488..0b8e71da 100644 --- a/juno_samples/edge_detection/src/edge_detection.jn +++ b/juno_samples/edge_detection/src/edge_detection.jn @@ -7,11 +7,11 @@ fn gaussian_smoothing<n, m, gs : usize>( // Define the gaussian radius as half the gaussian size const gr = gs / 2; - for row = 0 to n { + @image_loop for row = 0 to n { for col = 0 to m { let smoothed = 0.0; - for i = 0 to gs { + @filter_loop for i = 0 to gs { for j = 0 to gs { let val = input[if row + i < gr then 0 else if row + i - gr > n - 1 then n - 1 @@ -41,11 +41,11 @@ fn laplacian_estimate<n, m, sz: usize>( @res let result : f32[n, m]; - for row = 0 to n { + @image_loop for row = 0 to n { for col = 0 to m { // Copy data for dilation filter @shr1 let imageArea : f32[sz, sz]; - for i = 0 to sz { + @filter_loop for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MIN_BR else if row + i - r > n - 1 then MIN_BR @@ -57,7 +57,7 @@ fn laplacian_estimate<n, m, sz: usize>( // Compute pixel of dilated image let dilated = MIN_BR; - for i = 0 to sz { + @filter_loop for i = 0 to sz { for j = 0 to sz { dilated = max!(dilated, imageArea[i, j] * structure[i, j]); } @@ -65,7 +65,7 @@ fn laplacian_estimate<n, m, sz: usize>( // Data copy for erotion filter @shr2 let imageArea : f32[sz, sz]; - for i = 0 to sz { + @filter_loop for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MAX_BR else if row + i - r > n - 1 then MAX_BR @@ -77,7 +77,7 @@ fn laplacian_estimate<n, m, sz: usize>( // Compute pixel of eroded image let eroded = MAX_BR; - for i = 0 to sz { + @filter_loop for i = 0 to sz { for j = 0 to sz { eroded = min!(eroded, imageArea[i, j] * structure[i, j]); } @@ -99,11 +99,11 @@ fn zero_crossings<n, m, sz: usize>( @res let result : f32[n, m]; - for row = 0 to n { + @image_loop for row = 0 to n { for col = 0 to m { // Data copy for dilation filter @shr1 let imageArea : f32[sz, sz]; - for i = 0 to sz { + @filter_loop for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MIN_BR else if row + i - r > n - 1 then MIN_BR @@ -117,7 +117,7 @@ fn zero_crossings<n, m, sz: usize>( // Compute the pixel of dilated image let dilated = MIN_BR; - for i = 0 to sz { + @filter_loop for i = 0 to sz { for j = 0 to sz { dilated = max!(dilated, imageArea[i, j] * structure[i, j]); } @@ -125,7 +125,7 @@ fn zero_crossings<n, m, sz: usize>( // Data copy for erotion filter @shr2 let imageArea : f32[sz, sz]; - for i = 0 to sz { + @filter_loop for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MAX_BR else if row + i - r > n - 1 then MAX_BR @@ -139,7 +139,7 @@ fn zero_crossings<n, m, sz: usize>( // Compute the pixel of eroded image let eroded = MAX_BR; - for i = 0 to sz { + @filter_loop for i = 0 to sz { for j = 0 to sz { eroded = min!(eroded, imageArea[i, j] * structure[i, j]); } -- GitLab