diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch
index 7d788c2f099ca664d9e2ff751fde3394a1623cdd..32e2b63b55c69e6e2c05cabc3221f3c8feff674b 100644
--- a/juno_samples/cava/src/cpu.sch
+++ b/juno_samples/cava/src/cpu.sch
@@ -111,15 +111,11 @@ fixpoint {
   fork-fusion(fuse4@channel_loop);
 }
 simpl!(fuse4);
-
-if !feature("dont_fuse_gamut") {
-  array-slf(fuse4);
-  simpl!(fuse4);
-}
+array-slf(fuse4);
+simpl!(fuse4);
 
 if !feature("seq") {
   let par = fuse4@image_loop \ fuse4@channel_loop;
-  let par = par \ fuse4@cp_loop; 
   fork-tile[4, 1, false, false](par);
   fork-tile[8, 0, false, false](par);
   fork-interchange[1, 2](par);
@@ -128,6 +124,8 @@ if !feature("seq") {
   fork-coalesce(fuse4, fuse4_body);
   simpl!(fuse4, fuse4_body);
   fuse4 = fuse4_body;
+} else {
+  fork-tile[6, 0, false, true](fuse4@channel_loop);
 }
 
 no-memset(fuse5@res1);
diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch
index daf523391fac697c2c1edf8ee02f4f13141bfcd2..0ef466c00d08f3edffe02bad9612ce23ebf14768 100644
--- a/juno_samples/cava/src/gpu.sch
+++ b/juno_samples/cava/src/gpu.sch
@@ -115,25 +115,12 @@ fixpoint {
   fork-fusion(fuse4@channel_loop);
 }
 simpl!(fuse4);
-
-if !feature("dont_fuse_gamut") {
-  array-slf(fuse4);
-  simpl!(fuse4);
-  fork-tile[2, 0, false, true](fuse4@channel_loop);
-  let out = fork-split(fuse4@channel_loop);
-  fork-unroll(out.cava_3.fj1);
-  unforkify(fuse4@channel_loop);
-}
-
-let par = fuse4@image_loop \ fuse4@channel_loop;
-let par = par \ fuse4@cp_loop; 
-
-fork-tile[4, 1, false, true](par);
-fork-tile[8, 0, false, true](par);
-fork-interchange[1, 2](par);
-let split = fork-split(par);
-fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2);
-fork-coalesce(split.cava_3.fj2);
+array-slf(fuse4);
+simpl!(fuse4);
+fork-tile[2, 0, false, true](fuse4@channel_loop);
+let out = fork-split(fuse4@channel_loop);
+fork-unroll(out.cava_3.fj1);
+unforkify(fuse4@channel_loop);
 
 no-memset(fuse5@res1);
 no-memset(fuse5@res2);
@@ -146,7 +133,12 @@ simpl!(fuse5);
 array-slf(fuse5);
 simpl!(fuse5);
 
-
+fork-tile[4, 1, false, true](fuse4);
+fork-tile[8, 0, false, true](fuse4);
+fork-interchange[1, 2](fuse4);
+let split = fork-split(fuse4);
+fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2);
+fork-coalesce(split.cava_3.fj2);
 
 delete-uncalled(*);
 simpl!(*);