diff --git a/juno_samples/cava/src/cava.jn b/juno_samples/cava/src/cava.jn
index a06c8d7b03ca2cb962bb7c5c31e87cd3b3a7e502..720629e72b5e5085e7137d88302e54ef564c6ef7 100644
--- a/juno_samples/cava/src/cava.jn
+++ b/juno_samples/cava/src/cava.jn
@@ -1,7 +1,7 @@
 fn medianMatrix<a : number, rows, cols : usize>(m : a[rows, cols]) -> a {
   const n : usize = rows * cols;
 
-  let tmp : a[rows * cols];
+  @tmp let tmp : a[rows * cols];
   for i = 0 to rows * cols {
     tmp[i] = m[i / cols, i % cols];
   }
@@ -102,13 +102,13 @@ fn demosaic<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN,
 }
 
 fn denoise<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, row, col] {
-  let res : f32[CHAN, row, col];
+  @res let res : f32[CHAN, row, col];
 
   for chan = 0 to CHAN {
     for r = 0 to row {
       for c = 0 to col {
         if r >= 1 && r < row - 1 && c >= 1 && c < col - 1 {
-          let filter : f32[3][3]; // same as [3, 3]
+          @filter let filter : f32[3][3]; // same as [3, 3]
           for i = 0 to 3 by 1 {
             for j = 0 to 3 by 1 {
               filter[i, j] = input[chan, r + i - 1, c + j - 1];
@@ -209,7 +209,7 @@ fn cava<r, c, num_ctrl_pts : usize>(
 ) -> u8[CHAN, r, c] {
   @fuse1 let scaled = scale::<r, c>(input);
   @fuse1 let demosc = demosaic::<r, c>(scaled);
-  let denosd = denoise::<r, c>(demosc);
+  @fuse2 let denosd = denoise::<r, c>(demosc);
   let transf = transform::<r, c>(denosd, TsTw);
   let gamutd = gamut::<r, c, num_ctrl_pts>(transf, ctrl_pts, weights, coefs);
   let tonemd = tone_map::<r, c>(gamutd, tonemap);
diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch
index f090eab35861e1a3977744a7dad8754e1ba9714e..b0479f5c157846d2f1d0bf3da541672979544a65 100644
--- a/juno_samples/cava/src/cpu.sch
+++ b/juno_samples/cava/src/cpu.sch
@@ -1,6 +1,7 @@
 macro simpl!(X) {
   ccp(X);
   simplify-cfg(X);
+  lift-dc-math(X);
   gvn(X);
   phi-elim(X);
   dce(X);
@@ -12,8 +13,8 @@ simpl!(*);
 let fuse1 = outline(cava@fuse1);
 inline(fuse1);
 
-inline(denoise);
-cpu(denoise, transform, gamut, tone_map, descale);
+let fuse2 = outline(cava@fuse2);
+inline(fuse2);
 
 ip-sroa(*);
 sroa(*);
@@ -28,9 +29,24 @@ fixpoint {
 simpl!(fuse1);
 array-slf(fuse1);
 simpl!(fuse1);
-xdot[true](fuse1);
 unforkify(fuse1);
 
+inline(fuse2);
+no-memset(fuse2@res);
+no-memset(fuse2@filter);
+no-memset(fuse2@tmp);
+fixpoint {
+  forkify(fuse2);
+  fork-guard-elim(fuse2);
+  fork-coalesce(fuse2);
+}
+simpl!(fuse2);
+array-slf(fuse2);
+simpl!(fuse2);
+array-slf(fuse2);
+simpl!(fuse2);
+xdot[true](fuse2);
+
 gcm(*);
 fixpoint {
   float-collections(*);