From e37ad2b482dfe3fd23723b5b0f8015754e447bea Mon Sep 17 00:00:00 2001
From: Russel Arbore <rarbore2@illinois.edu>
Date: Sat, 1 Mar 2025 10:42:33 -0600
Subject: [PATCH] some tuning

---
 juno_samples/cava/src/cava.jn           |  4 ++--
 juno_samples/cava/src/cpu.sch           |  2 +-
 juno_samples/cava/src/gpu.sch           |  6 +++---
 juno_samples/edge_detection/src/cpu.sch | 10 +++++-----
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/juno_samples/cava/src/cava.jn b/juno_samples/cava/src/cava.jn
index 4d02b2cd..931e78f8 100644
--- a/juno_samples/cava/src/cava.jn
+++ b/juno_samples/cava/src/cava.jn
@@ -145,7 +145,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>(
   @image_loop for r = 0 to row {
     for c = 0 to col {
       @l2 let l2_dist : f32[num_ctrl_pts];
-      for cp = 0 to num_ctrl_pts {
+      @cp_loop for cp = 0 to num_ctrl_pts {
         let v1 = input[0, r, c] - ctrl_pts[cp, 0];
         let v2 = input[1, r, c] - ctrl_pts[cp, 1];
         let v3 = input[2, r, c] - ctrl_pts[cp, 2];
@@ -155,7 +155,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>(
      
       @channel_loop for chan = 0 to CHAN {
         let chan_val : f32 = 0.0;
-        for cp = 0 to num_ctrl_pts {
+        @cp_loop for cp = 0 to num_ctrl_pts {
           chan_val += l2_dist[cp] * weights[cp, chan];
         }
 
diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch
index 8f22b37d..6fc8adbb 100644
--- a/juno_samples/cava/src/cpu.sch
+++ b/juno_samples/cava/src/cpu.sch
@@ -115,7 +115,7 @@ array-slf(fuse4);
 simpl!(fuse4);
 let par = fuse4@image_loop \ fuse4@channel_loop;
 fork-tile[4, 1, false, false](par);
-fork-tile[4, 0, false, false](par);
+fork-tile[8, 0, false, false](par);
 fork-interchange[1, 2](par);
 let split = fork-split(par);
 let fuse4_body = outline(split.cava_3.fj2);
diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch
index bacfd3ab..aa1df390 100644
--- a/juno_samples/cava/src/gpu.sch
+++ b/juno_samples/cava/src/gpu.sch
@@ -117,9 +117,9 @@ fixpoint {
 simpl!(fuse4);
 array-slf(fuse4);
 simpl!(fuse4);
-//fork-tile[2, 0, false, true](fuse4@channel_loop);
-//fork-split(fuse4@channel_loop);
-//clean-monoid-reduces(fuse4);
+fork-tile[2, 0, false, true](fuse4@channel_loop);
+let out = fork-split(fuse4@channel_loop);
+fork-unroll(out.cava_3.fj1);
 unforkify(fuse4@channel_loop);
 
 no-memset(fuse5@res1);
diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch
index ec9e423d..b46523f3 100644
--- a/juno_samples/edge_detection/src/cpu.sch
+++ b/juno_samples/edge_detection/src/cpu.sch
@@ -26,7 +26,7 @@ predication(gaussian_smoothing);
 simpl!(gaussian_smoothing);
 let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop;
 fork-tile[4, 1, false, false](par);
-fork-tile[4, 0, false, false](par);
+fork-tile[8, 0, false, false](par);
 fork-interchange[1, 2](par);
 let split = fork-split(par);
 let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2);
@@ -42,7 +42,7 @@ fixpoint {
 simpl!(laplacian_estimate);
 let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop;
 fork-tile[4, 1, false, false](par);
-fork-tile[4, 0, false, false](par);
+fork-tile[8, 0, false, false](par);
 fork-interchange[1, 2](par);
 let split = fork-split(par);
 let body = split._1_laplacian_estimate.fj2 | laplacian_estimate.shr1 | laplacian_estimate.shr2;
@@ -59,7 +59,7 @@ fixpoint {
 simpl!(zero_crossings);
 let par = zero_crossings@image_loop \ zero_crossings@filter_loop;
 fork-tile[4, 1, false, false](par);
-fork-tile[4, 0, false, false](par);
+fork-tile[8, 0, false, false](par);
 fork-interchange[1, 2](par);
 let split = fork-split(par);
 let body = split._2_zero_crossings.fj2 | zero_crossings.shr1 | zero_crossings.shr2;
@@ -86,7 +86,7 @@ fixpoint {
 simpl!(max_gradient);
 fork-dim-merge(max_gradient);
 simpl!(max_gradient);
-fork-tile[16, 0, false, false](max_gradient);
+fork-tile[32, 0, false, false](max_gradient);
 let split = fork-split(max_gradient);
 clean-monoid-reduces(max_gradient);
 let out = outline(split._4_max_gradient.fj1);
@@ -105,7 +105,7 @@ fixpoint {
 predication(reject_zero_crossings);
 simpl!(reject_zero_crossings);
 fork-tile[4, 1, false, false](reject_zero_crossings);
-fork-tile[4, 0, false, false](reject_zero_crossings);
+fork-tile[8, 0, false, false](reject_zero_crossings);
 fork-interchange[1, 2](reject_zero_crossings);
 let split = fork-split(reject_zero_crossings);
 let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2);
-- 
GitLab