From 0c7afae76803f306266fc489e0cb8d9c6edf75fb Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 1 Mar 2025 17:04:11 -0600
Subject: [PATCH] tile part of edge

---
 hercules_opt/src/fork_transforms.rs     |  7 +++++++
 juno_samples/edge_detection/src/gpu.sch | 27 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/hercules_opt/src/fork_transforms.rs b/hercules_opt/src/fork_transforms.rs
index e6db0345..8bd3f735 100644
--- a/hercules_opt/src/fork_transforms.rs
+++ b/hercules_opt/src/fork_transforms.rs
@@ -1169,6 +1169,7 @@ pub fn fork_dim_merge(
                     op: BinaryOperator::Rem,
                 });
                 edit.sub_edit(tid, rem);
+                edit.sub_edit(tid, outer_tid);
                 edit = edit.replace_all_uses(tid, rem)?;
             } else if tid_dim == inner_idx {
                 let outer_tid = Node::ThreadID {
@@ -1185,6 +1186,7 @@ pub fn fork_dim_merge(
                     op: BinaryOperator::Div,
                 });
                 edit.sub_edit(tid, div);
+                edit.sub_edit(tid, outer_tid);
                 edit = edit.replace_all_uses(tid, div)?;
             }
         }
@@ -1479,7 +1481,12 @@ fn fork_fusion(
     }
 
     // Perform the fusion.
+    let bottom_tids: Vec<_> = editor
+        .get_users(bottom_fork)
+        .filter(|id| nodes[id.idx()].is_thread_id())
+        .collect();
     editor.edit(|mut edit| {
+        edit = edit.replace_all_uses_where(bottom_fork, top_fork, |id| bottom_tids.contains(id))?;
         if bottom_join_pred != bottom_fork {
             // If there is control flow in the bottom fork-join, stitch it into
             // the top fork-join.
diff --git a/juno_samples/edge_detection/src/gpu.sch b/juno_samples/edge_detection/src/gpu.sch
index 065a78f2..f8da90d0 100644
--- a/juno_samples/edge_detection/src/gpu.sch
+++ b/juno_samples/edge_detection/src/gpu.sch
@@ -26,6 +26,15 @@ predication(gaussian_smoothing);
 simpl!(gaussian_smoothing);
 predication(gaussian_smoothing);
 simpl!(gaussian_smoothing);
+fork-dim-merge(gaussian_smoothing@filter_loop);
+unforkify(gaussian_smoothing@filter_loop);
+simpl!(gaussian_smoothing);
+
+fork-dim-merge(gaussian_smoothing);
+fork-tile[32, 0, false, true](gaussian_smoothing);
+simpl!(gaussian_smoothing);
+fork-split(gaussian_smoothing);
+simpl!(gaussian_smoothing);
 
 no-memset(laplacian_estimate@res);
 fixpoint {
@@ -34,6 +43,15 @@ fixpoint {
   fork-coalesce(laplacian_estimate);
 }
 simpl!(laplacian_estimate);
+fork-dim-merge(laplacian_estimate@filter_loop);
+unforkify(laplacian_estimate@filter_loop);
+simpl!(laplacian_estimate);
+
+fork-dim-merge(laplacian_estimate);
+fork-tile[32, 0, false, true](laplacian_estimate);
+simpl!(laplacian_estimate);
+fork-split(laplacian_estimate);
+simpl!(laplacian_estimate);
 
 no-memset(zero_crossings@res);
 fixpoint {
@@ -42,6 +60,15 @@ fixpoint {
   fork-coalesce(zero_crossings);
 }
 simpl!(zero_crossings);
+fork-dim-merge(zero_crossings@filter_loop);
+unforkify(zero_crossings@filter_loop);
+simpl!(zero_crossings);
+
+fork-dim-merge(zero_crossings);
+fork-tile[32, 0, false, true](zero_crossings);
+simpl!(zero_crossings);
+fork-split(zero_crossings);
+simpl!(zero_crossings);
 
 no-memset(gradient@res);
 fixpoint {
-- 
GitLab