diff --git a/juno_samples/cava/src/cava.jn b/juno_samples/cava/src/cava.jn index 4d02b2cdb5f843774ebae9b873388e03c82071b0..931e78f8196eba77ff3f9015ab7f4561b1153c57 100644 --- a/juno_samples/cava/src/cava.jn +++ b/juno_samples/cava/src/cava.jn @@ -145,7 +145,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>( @image_loop for r = 0 to row { for c = 0 to col { @l2 let l2_dist : f32[num_ctrl_pts]; - for cp = 0 to num_ctrl_pts { + @cp_loop for cp = 0 to num_ctrl_pts { let v1 = input[0, r, c] - ctrl_pts[cp, 0]; let v2 = input[1, r, c] - ctrl_pts[cp, 1]; let v3 = input[2, r, c] - ctrl_pts[cp, 2]; @@ -155,7 +155,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>( @channel_loop for chan = 0 to CHAN { let chan_val : f32 = 0.0; - for cp = 0 to num_ctrl_pts { + @cp_loop for cp = 0 to num_ctrl_pts { chan_val += l2_dist[cp] * weights[cp, chan]; } diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index 8f22b37d4c3a77a31e8d2467c8e6e130c281513f..6fc8adbb1fff30856978ce201af09fd2e02d6298 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -115,7 +115,7 @@ array-slf(fuse4); simpl!(fuse4); let par = fuse4@image_loop \ fuse4@channel_loop; fork-tile[4, 1, false, false](par); -fork-tile[4, 0, false, false](par); +fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); let split = fork-split(par); let fuse4_body = outline(split.cava_3.fj2); diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch index bacfd3abca363a6dd93496c1adb23fa54c860b9f..aa1df3904fd8fa700ba5da50b1d1217ee43a576e 100644 --- a/juno_samples/cava/src/gpu.sch +++ b/juno_samples/cava/src/gpu.sch @@ -117,9 +117,9 @@ fixpoint { simpl!(fuse4); array-slf(fuse4); simpl!(fuse4); -//fork-tile[2, 0, false, true](fuse4@channel_loop); -//fork-split(fuse4@channel_loop); -//clean-monoid-reduces(fuse4); +fork-tile[2, 0, false, true](fuse4@channel_loop); +let out = fork-split(fuse4@channel_loop); +fork-unroll(out.cava_3.fj1); unforkify(fuse4@channel_loop); no-memset(fuse5@res1); diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch index ec9e423dc4c160d08b61eaf45d2b75329886f94e..b46523f34631c87e17bd7c9b2dedddc50acf06e9 100644 --- a/juno_samples/edge_detection/src/cpu.sch +++ b/juno_samples/edge_detection/src/cpu.sch @@ -26,7 +26,7 @@ predication(gaussian_smoothing); simpl!(gaussian_smoothing); let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop; fork-tile[4, 1, false, false](par); -fork-tile[4, 0, false, false](par); +fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); let split = fork-split(par); let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2); @@ -42,7 +42,7 @@ fixpoint { simpl!(laplacian_estimate); let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop; fork-tile[4, 1, false, false](par); -fork-tile[4, 0, false, false](par); +fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); let split = fork-split(par); let body = split._1_laplacian_estimate.fj2 | laplacian_estimate.shr1 | laplacian_estimate.shr2; @@ -59,7 +59,7 @@ fixpoint { simpl!(zero_crossings); let par = zero_crossings@image_loop \ zero_crossings@filter_loop; fork-tile[4, 1, false, false](par); -fork-tile[4, 0, false, false](par); +fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); let split = fork-split(par); let body = split._2_zero_crossings.fj2 | zero_crossings.shr1 | zero_crossings.shr2; @@ -86,7 +86,7 @@ fixpoint { simpl!(max_gradient); fork-dim-merge(max_gradient); simpl!(max_gradient); -fork-tile[16, 0, false, false](max_gradient); +fork-tile[32, 0, false, false](max_gradient); let split = fork-split(max_gradient); clean-monoid-reduces(max_gradient); let out = outline(split._4_max_gradient.fj1); @@ -105,7 +105,7 @@ fixpoint { predication(reject_zero_crossings); simpl!(reject_zero_crossings); fork-tile[4, 1, false, false](reject_zero_crossings); -fork-tile[4, 0, false, false](reject_zero_crossings); +fork-tile[8, 0, false, false](reject_zero_crossings); fork-interchange[1, 2](reject_zero_crossings); let split = fork-split(reject_zero_crossings); let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2);