diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch
index ead722ce0adfcf61ccc6dc79c70e1ba76d0e8eeb..cb65d183b95a9d4c2babe4e9cde70a347311cf05 100644
--- a/juno_samples/edge_detection/src/cpu.sch
+++ b/juno_samples/edge_detection/src/cpu.sch
@@ -24,6 +24,14 @@ predication(gaussian_smoothing);
 simpl!(gaussian_smoothing);
 predication(gaussian_smoothing);
 simpl!(gaussian_smoothing);
+let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop;
+fork-tile[4, 1, false, false](par);
+fork-tile[4, 0, false, false](par);
+fork-interchange[1, 2](par);
+let split = fork-split(par);
+let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2);
+fork-coalesce(gaussian_smoothing, gaussian_smoothing_body);
+simpl!(gaussian_smoothing, gaussian_smoothing_body);
 
 no-memset(laplacian_estimate@res, laplacian_estimate@shr1, laplacian_estimate@shr2);
 fixpoint {
@@ -32,6 +40,14 @@ fixpoint {
   fork-coalesce(laplacian_estimate);
 }
 simpl!(laplacian_estimate);
+let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop;
+fork-tile[4, 1, false, false](par);
+fork-tile[4, 0, false, false](par);
+fork-interchange[1, 2](par);
+let split = fork-split(par);
+let laplacian_estimate_body = outline(split._1_laplacian_estimate.fj2);
+fork-coalesce(laplacian_estimate, laplacian_estimate_body);
+simpl!(laplacian_estimate, laplacian_estimate_body);
 
 no-memset(zero_crossings@res, zero_crossings@shr1, zero_crossings@shr2);
 fixpoint {
@@ -40,6 +56,14 @@ fixpoint {
   fork-coalesce(zero_crossings);
 }
 simpl!(zero_crossings);
+let par = zero_crossings@image_loop \ zero_crossings@filter_loop;
+fork-tile[4, 1, false, false](par);
+fork-tile[4, 0, false, false](par);
+fork-interchange[1, 2](par);
+let split = fork-split(par);
+let zero_crossings_body = outline(split._2_zero_crossings.fj2);
+fork-coalesce(zero_crossings, zero_crossings_body);
+simpl!(zero_crossings, zero_crossings_body);
 
 no-memset(gradient@res);
 fixpoint {
@@ -81,8 +105,8 @@ simpl!(reject_zero_crossings);
 
 async-call(edge_detection@le, edge_detection@zc);
 
-fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);
-unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);
+fork-split(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings);
+unforkify(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings);
 
 simpl!(*);
 
diff --git a/juno_samples/edge_detection/src/edge_detection.jn b/juno_samples/edge_detection/src/edge_detection.jn
index e1413488e95d324e154ac478c2131db666fcbbf8..0b8e71da5b079a0828120dd347af20564271d39c 100644
--- a/juno_samples/edge_detection/src/edge_detection.jn
+++ b/juno_samples/edge_detection/src/edge_detection.jn
@@ -7,11 +7,11 @@ fn gaussian_smoothing<n, m, gs : usize>(
   // Define the gaussian radius as half the gaussian size
   const gr = gs / 2;
 
-  for row = 0 to n {
+  @image_loop for row = 0 to n {
     for col = 0 to m {
       let smoothed = 0.0;
 
-      for i = 0 to gs {
+      @filter_loop for i = 0 to gs {
         for j = 0 to gs {
           let val = input[if row + i < gr               then 0
                           else if row + i - gr > n - 1  then n - 1
@@ -41,11 +41,11 @@ fn laplacian_estimate<n, m, sz: usize>(
 
   @res let result : f32[n, m];
 
-  for row = 0 to n {
+  @image_loop for row = 0 to n {
     for col = 0 to m {
       // Copy data for dilation filter
       @shr1 let imageArea : f32[sz, sz];
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MIN_BR
                             else if row + i - r > n - 1 then MIN_BR
@@ -57,7 +57,7 @@ fn laplacian_estimate<n, m, sz: usize>(
 
       // Compute pixel of dilated image
       let dilated = MIN_BR;
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           dilated = max!(dilated, imageArea[i, j] * structure[i, j]);
         }
@@ -65,7 +65,7 @@ fn laplacian_estimate<n, m, sz: usize>(
 
       // Data copy for erotion filter
       @shr2 let imageArea : f32[sz, sz];
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MAX_BR
                             else if row + i - r > n - 1 then MAX_BR
@@ -77,7 +77,7 @@ fn laplacian_estimate<n, m, sz: usize>(
 
       // Compute pixel of eroded image
       let eroded = MAX_BR;
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           eroded = min!(eroded, imageArea[i, j] * structure[i, j]);
         }
@@ -99,11 +99,11 @@ fn zero_crossings<n, m, sz: usize>(
 
   @res let result : f32[n, m];
 
-  for row = 0 to n {
+  @image_loop for row = 0 to n {
     for col = 0 to m {
       // Data copy for dilation filter
       @shr1 let imageArea : f32[sz, sz];
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MIN_BR
                             else if row + i - r > n - 1 then MIN_BR
@@ -117,7 +117,7 @@ fn zero_crossings<n, m, sz: usize>(
 
       // Compute the pixel of dilated image
       let dilated = MIN_BR;
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           dilated = max!(dilated, imageArea[i, j] * structure[i, j]);
         }
@@ -125,7 +125,7 @@ fn zero_crossings<n, m, sz: usize>(
 
       // Data copy for erotion filter
       @shr2 let imageArea : f32[sz, sz];
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           imageArea[i, j] = if row + i < r              then MAX_BR
                             else if row + i - r > n - 1 then MAX_BR
@@ -139,7 +139,7 @@ fn zero_crossings<n, m, sz: usize>(
 
       // Compute the pixel of eroded image
       let eroded = MAX_BR;
-      for i = 0 to sz {
+      @filter_loop for i = 0 to sz {
         for j = 0 to sz {
           eroded = min!(eroded, imageArea[i, j] * structure[i, j]);
         }