From e37ad2b482dfe3fd23723b5b0f8015754e447bea Mon Sep 17 00:00:00 2001 From: Russel Arbore <rarbore2@illinois.edu> Date: Sat, 1 Mar 2025 10:42:33 -0600 Subject: [PATCH] some tuning --- juno_samples/cava/src/cava.jn | 4 ++-- juno_samples/cava/src/cpu.sch | 2 +- juno_samples/cava/src/gpu.sch | 6 +++--- juno_samples/edge_detection/src/cpu.sch | 10 +++++----- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/juno_samples/cava/src/cava.jn b/juno_samples/cava/src/cava.jn index 4d02b2cd..931e78f8 100644 --- a/juno_samples/cava/src/cava.jn +++ b/juno_samples/cava/src/cava.jn @@ -145,7 +145,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>( @image_loop for r = 0 to row { for c = 0 to col { @l2 let l2_dist : f32[num_ctrl_pts]; - for cp = 0 to num_ctrl_pts { + @cp_loop for cp = 0 to num_ctrl_pts { let v1 = input[0, r, c] - ctrl_pts[cp, 0]; let v2 = input[1, r, c] - ctrl_pts[cp, 1]; let v3 = input[2, r, c] - ctrl_pts[cp, 2]; @@ -155,7 +155,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>( @channel_loop for chan = 0 to CHAN { let chan_val : f32 = 0.0; - for cp = 0 to num_ctrl_pts { + @cp_loop for cp = 0 to num_ctrl_pts { chan_val += l2_dist[cp] * weights[cp, chan]; } diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index 8f22b37d..6fc8adbb 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -115,7 +115,7 @@ array-slf(fuse4); simpl!(fuse4); let par = fuse4@image_loop \ fuse4@channel_loop; fork-tile[4, 1, false, false](par); -fork-tile[4, 0, false, false](par); +fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); let split = fork-split(par); let fuse4_body = outline(split.cava_3.fj2); diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch index bacfd3ab..aa1df390 100644 --- a/juno_samples/cava/src/gpu.sch +++ b/juno_samples/cava/src/gpu.sch @@ -117,9 +117,9 @@ fixpoint { simpl!(fuse4); array-slf(fuse4); simpl!(fuse4); -//fork-tile[2, 0, false, true](fuse4@channel_loop); -//fork-split(fuse4@channel_loop); -//clean-monoid-reduces(fuse4); +fork-tile[2, 0, false, true](fuse4@channel_loop); +let out = fork-split(fuse4@channel_loop); +fork-unroll(out.cava_3.fj1); unforkify(fuse4@channel_loop); no-memset(fuse5@res1); diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch index ec9e423d..b46523f3 100644 --- a/juno_samples/edge_detection/src/cpu.sch +++ b/juno_samples/edge_detection/src/cpu.sch @@ -26,7 +26,7 @@ predication(gaussian_smoothing); simpl!(gaussian_smoothing); let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop; fork-tile[4, 1, false, false](par); -fork-tile[4, 0, false, false](par); +fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); let split = fork-split(par); let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2); @@ -42,7 +42,7 @@ fixpoint { simpl!(laplacian_estimate); let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop; fork-tile[4, 1, false, false](par); -fork-tile[4, 0, false, false](par); +fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); let split = fork-split(par); let body = split._1_laplacian_estimate.fj2 | laplacian_estimate.shr1 | laplacian_estimate.shr2; @@ -59,7 +59,7 @@ fixpoint { simpl!(zero_crossings); let par = zero_crossings@image_loop \ zero_crossings@filter_loop; fork-tile[4, 1, false, false](par); -fork-tile[4, 0, false, false](par); +fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); let split = fork-split(par); let body = split._2_zero_crossings.fj2 | zero_crossings.shr1 | zero_crossings.shr2; @@ -86,7 +86,7 @@ fixpoint { simpl!(max_gradient); fork-dim-merge(max_gradient); simpl!(max_gradient); -fork-tile[16, 0, false, false](max_gradient); +fork-tile[32, 0, false, false](max_gradient); let split = fork-split(max_gradient); clean-monoid-reduces(max_gradient); let out = outline(split._4_max_gradient.fj1); @@ -105,7 +105,7 @@ fixpoint { predication(reject_zero_crossings); simpl!(reject_zero_crossings); fork-tile[4, 1, false, false](reject_zero_crossings); -fork-tile[4, 0, false, false](reject_zero_crossings); +fork-tile[8, 0, false, false](reject_zero_crossings); fork-interchange[1, 2](reject_zero_crossings); let split = fork-split(reject_zero_crossings); let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2); -- GitLab