diff --git a/juno_samples/cava/src/cava.jn b/juno_samples/cava/src/cava.jn index de21ba78d5fc511fcef122b633c37fcee1b242bc..29fc4df52960609a8f8155d5571b50641c70705b 100644 --- a/juno_samples/cava/src/cava.jn +++ b/juno_samples/cava/src/cava.jn @@ -22,7 +22,7 @@ fn medianMatrix<a : number, rows, cols : usize>(m : a[rows, cols]) -> a { const CHAN : u64 = 3; fn scale<row : usize, col : usize>(input : u8[CHAN, row, col]) -> f32[CHAN, row, col] { - @const let res : f32[CHAN, row, col]; + @res1 let res : f32[CHAN, row, col]; for chan = 0 to CHAN { for r = 0 to row { @@ -50,7 +50,7 @@ fn descale<row : usize, col : usize>(input : f32[CHAN, row, col]) -> u8[CHAN, ro } fn demosaic<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, row, col] { - let res : f32[CHAN, row, col]; + @res2 let res : f32[CHAN, row, col]; for r = 1 to row-1 { for c = 1 to col-1 { @@ -129,7 +129,7 @@ fn denoise<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, r fn transform<row : usize, col : usize> (input : f32[CHAN, row, col], tstw_trans : f32[CHAN, CHAN]) -> f32[CHAN, row, col] { - let result : f32[CHAN, row, col]; + @res let result : f32[CHAN, row, col]; for chan = 0 to CHAN { for r = 0 to row { @@ -152,11 +152,11 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>( weights : f32[num_ctrl_pts, CHAN], coefs : f32[4, CHAN] ) -> f32[CHAN, row, col] { - let result : f32[CHAN, row, col]; - let l2_dist : f32[num_ctrl_pts]; + @res let result : f32[CHAN, row, col]; for r = 0 to row { for c = 0 to col { + @l2 let l2_dist : f32[num_ctrl_pts]; for cp = 0 to num_ctrl_pts { let v1 = input[0, r, c] - ctrl_pts[cp, 0]; let v2 = input[1, r, c] - ctrl_pts[cp, 1]; @@ -210,8 +210,8 @@ fn cava<r, c, num_ctrl_pts : usize>( @fuse1 let scaled = scale::<r, c>(input); @fuse1 let demosc = demosaic::<r, c>(scaled); @fuse2 let denosd = denoise::<r, c>(demosc); - let transf = transform::<r, c>(denosd, TsTw); - let gamutd = gamut::<r, c, num_ctrl_pts>(transf, ctrl_pts, weights, coefs); + @fuse3 let transf = transform::<r, c>(denosd, TsTw); + @fuse4 let gamutd = gamut::<r, c, num_ctrl_pts>(transf, ctrl_pts, weights, coefs); let tonemd = tone_map::<r, c>(gamutd, tonemap); let dscald = descale::<r, c>(tonemd); return dscald; diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index c75260ca0f10a5920598a9a37586446a7c950880..8099c0babec70af1446bd5d5778061051ee62045 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -16,11 +16,18 @@ inline(fuse1); let fuse2 = outline(cava@fuse2); inline(fuse2); +let fuse3 = outline(cava@fuse3); +inline(fuse3); + +let fuse4 = outline(cava@fuse4); +inline(fuse4); + ip-sroa(*); sroa(*); simpl!(*); -no-memset(fuse1@const); +no-memset(fuse1@res1); +no-memset(fuse1@res2); fixpoint { forkify(fuse1); fork-guard-elim(fuse1); @@ -45,6 +52,24 @@ array-slf(fuse2); simpl!(fuse2); unforkify(fuse2); -delete-uncalled(*); +no-memset(fuse3@res); +fixpoint { + forkify(fuse3); + fork-guard-elim(fuse3); + fork-coalesce(fuse3); +} +fork-split(fuse3); +unforkify(fuse3); +no-memset(fuse4@res); +no-memset(fuse4@l2); +fixpoint { + forkify(fuse4); + fork-guard-elim(fuse4); + fork-coalesce(fuse4); +} +fork-split(fuse4); +unforkify(fuse4); + +delete-uncalled(*); gcm(*); \ No newline at end of file