diff --git a/juno_samples/cava/src/cava.jn b/juno_samples/cava/src/cava.jn index a06c8d7b03ca2cb962bb7c5c31e87cd3b3a7e502..720629e72b5e5085e7137d88302e54ef564c6ef7 100644 --- a/juno_samples/cava/src/cava.jn +++ b/juno_samples/cava/src/cava.jn @@ -1,7 +1,7 @@ fn medianMatrix<a : number, rows, cols : usize>(m : a[rows, cols]) -> a { const n : usize = rows * cols; - let tmp : a[rows * cols]; + @tmp let tmp : a[rows * cols]; for i = 0 to rows * cols { tmp[i] = m[i / cols, i % cols]; } @@ -102,13 +102,13 @@ fn demosaic<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, } fn denoise<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, row, col] { - let res : f32[CHAN, row, col]; + @res let res : f32[CHAN, row, col]; for chan = 0 to CHAN { for r = 0 to row { for c = 0 to col { if r >= 1 && r < row - 1 && c >= 1 && c < col - 1 { - let filter : f32[3][3]; // same as [3, 3] + @filter let filter : f32[3][3]; // same as [3, 3] for i = 0 to 3 by 1 { for j = 0 to 3 by 1 { filter[i, j] = input[chan, r + i - 1, c + j - 1]; @@ -209,7 +209,7 @@ fn cava<r, c, num_ctrl_pts : usize>( ) -> u8[CHAN, r, c] { @fuse1 let scaled = scale::<r, c>(input); @fuse1 let demosc = demosaic::<r, c>(scaled); - let denosd = denoise::<r, c>(demosc); + @fuse2 let denosd = denoise::<r, c>(demosc); let transf = transform::<r, c>(denosd, TsTw); let gamutd = gamut::<r, c, num_ctrl_pts>(transf, ctrl_pts, weights, coefs); let tonemd = tone_map::<r, c>(gamutd, tonemap); diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index f090eab35861e1a3977744a7dad8754e1ba9714e..b0479f5c157846d2f1d0bf3da541672979544a65 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -1,6 +1,7 @@ macro simpl!(X) { ccp(X); simplify-cfg(X); + lift-dc-math(X); gvn(X); phi-elim(X); dce(X); @@ -12,8 +13,8 @@ simpl!(*); let fuse1 = outline(cava@fuse1); inline(fuse1); -inline(denoise); -cpu(denoise, transform, gamut, tone_map, descale); +let fuse2 = outline(cava@fuse2); +inline(fuse2); ip-sroa(*); sroa(*); @@ -28,9 +29,24 @@ fixpoint { simpl!(fuse1); array-slf(fuse1); simpl!(fuse1); -xdot[true](fuse1); unforkify(fuse1); +inline(fuse2); +no-memset(fuse2@res); +no-memset(fuse2@filter); +no-memset(fuse2@tmp); +fixpoint { + forkify(fuse2); + fork-guard-elim(fuse2); + fork-coalesce(fuse2); +} +simpl!(fuse2); +array-slf(fuse2); +simpl!(fuse2); +array-slf(fuse2); +simpl!(fuse2); +xdot[true](fuse2); + gcm(*); fixpoint { float-collections(*);