diff --git a/juno_samples/cava/src/cava.jn b/juno_samples/cava/src/cava.jn index 29fc4df52960609a8f8155d5571b50641c70705b..366792c3cfcb9d20b35da0760ea5e408aa26e0c9 100644 --- a/juno_samples/cava/src/cava.jn +++ b/juno_samples/cava/src/cava.jn @@ -35,20 +35,6 @@ fn scale<row : usize, col : usize>(input : u8[CHAN, row, col]) -> f32[CHAN, row, return res; } -fn descale<row : usize, col : usize>(input : f32[CHAN, row, col]) -> u8[CHAN, row, col] { - let res : u8[CHAN, row, col]; - - for chan = 0 to CHAN { - for r = 0 to row { - for c = 0 to col { - res[chan, r, c] = min!::<f32>(max!::<f32>(input[chan, r, c] * 255, 0), 255) as u8; - } - } - } - - return res; -} - fn demosaic<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, row, col] { @res2 let res : f32[CHAN, row, col]; @@ -184,7 +170,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>( fn tone_map<row : usize, col:usize> (input : f32[CHAN, row, col], tone_map : f32[256, CHAN]) -> f32[CHAN, row, col] { - let result : f32[CHAN, row, col]; + @res1 let result : f32[CHAN, row, col]; for chan = 0 to CHAN { for r = 0 to row { @@ -198,6 +184,20 @@ fn tone_map<row : usize, col:usize> return result; } +fn descale<row : usize, col : usize>(input : f32[CHAN, row, col]) -> u8[CHAN, row, col] { + @res2 let res : u8[CHAN, row, col]; + + for chan = 0 to CHAN { + for r = 0 to row { + for c = 0 to col { + res[chan, r, c] = min!::<f32>(max!::<f32>(input[chan, r, c] * 255, 0), 255) as u8; + } + } + } + + return res; +} + #[entry] fn cava<r, c, num_ctrl_pts : usize>( input : u8[CHAN, r, c], @@ -212,7 +212,7 @@ fn cava<r, c, num_ctrl_pts : usize>( @fuse2 let denosd = denoise::<r, c>(demosc); @fuse3 let transf = transform::<r, c>(denosd, TsTw); @fuse4 let gamutd = gamut::<r, c, num_ctrl_pts>(transf, ctrl_pts, weights, coefs); - let tonemd = tone_map::<r, c>(gamutd, tonemap); - let dscald = descale::<r, c>(tonemd); + @fuse5 let tonemd = tone_map::<r, c>(gamutd, tonemap); + @fuse5 let dscald = descale::<r, c>(tonemd); return dscald; } diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index 8099c0babec70af1446bd5d5778061051ee62045..d51b8b943111950e73dc1f9dd5ab126b33ae6b14 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -22,6 +22,9 @@ inline(fuse3); let fuse4 = outline(cava@fuse4); inline(fuse4); +let fuse5 = outline(cava@fuse5); +inline(fuse5); + ip-sroa(*); sroa(*); simpl!(*); @@ -58,6 +61,7 @@ fixpoint { fork-guard-elim(fuse3); fork-coalesce(fuse3); } +simpl!(fuse3); fork-split(fuse3); unforkify(fuse3); @@ -68,8 +72,23 @@ fixpoint { fork-guard-elim(fuse4); fork-coalesce(fuse4); } +simpl!(fuse4); fork-split(fuse4); unforkify(fuse4); +no-memset(fuse5@res1); +no-memset(fuse5@res2); +fixpoint { + forkify(fuse5); + fork-guard-elim(fuse5); + fork-coalesce(fuse5); +} +simpl!(fuse5); +array-slf(fuse5); +simpl!(fuse5); +fork-split(fuse5); +unforkify(fuse5); + +simpl!(*); delete-uncalled(*); gcm(*); \ No newline at end of file