diff --git a/juno_samples/edge_detection/src/edge_detection.jn b/juno_samples/edge_detection/src/edge_detection.jn
index ebd58206e033f85dc40848273ac4c40b4abbaace..3e49cb365b186037cf6f380c3aa6a4d3b483fb5b 100644
--- a/juno_samples/edge_detection/src/edge_detection.jn
+++ b/juno_samples/edge_detection/src/edge_detection.jn
@@ -138,7 +138,7 @@ fn gradient<n, m, sb: usize>(
       let gx = 0;
       let gy = 0;
 
-      for i = 0 to sb {
+      @filter_loop for i = 0 to sb {
         for j = 0 to sb {
           let val = input[if row + i < sbr              then 0
                           else if row + i - sbr > n - 1 then n - 1
diff --git a/juno_samples/edge_detection/src/gpu.sch b/juno_samples/edge_detection/src/gpu.sch
index f8da90d06e8ec82c200048b04dedfe19281ac0dd..666f6cef30f0c0ecedef57f02e50bc25d2b26b8f 100644
--- a/juno_samples/edge_detection/src/gpu.sch
+++ b/juno_samples/edge_detection/src/gpu.sch
@@ -80,6 +80,15 @@ predication(gradient);
 simpl!(gradient);
 predication(gradient);
 simpl!(gradient);
+fork-dim-merge(gradient@filter_loop);
+unforkify(gradient@filter_loop);
+simpl!(gradient);
+
+fork-dim-merge(gradient);
+fork-tile[32, 0, false, true](gradient);
+simpl!(gradient);
+fork-split(gradient);
+simpl!(gradient);
 
 fixpoint {
   forkify(max_gradient);
@@ -115,6 +124,12 @@ fixpoint {
 predication(reject_zero_crossings);
 simpl!(reject_zero_crossings);
 
+fork-dim-merge(reject_zero_crossings);
+fork-tile[32, 0, false, true](reject_zero_crossings);
+simpl!(reject_zero_crossings);
+fork-split(reject_zero_crossings);
+simpl!(reject_zero_crossings);
+
 async-call(edge_detection@le, edge_detection@zc);
 
 simpl!(*);