diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch
index 0ef466c00d08f3edffe02bad9612ce23ebf14768..0981804f8bdf5d38139a22f87967b95b28df86dc 100644
--- a/juno_samples/cava/src/gpu.sch
+++ b/juno_samples/cava/src/gpu.sch
@@ -109,18 +109,25 @@ fixpoint {
   fork-coalesce(fuse4);
 }
 simpl!(fuse4);
-fork-unroll(fuse4@channel_loop);
-simpl!(fuse4);
-fixpoint {
-  fork-fusion(fuse4@channel_loop);
+
+if !feature("dont_fuse_gamut") {
+  fork-unroll(fuse4@channel_loop);
+  simpl!(fuse4);
+  fixpoint {
+    fork-fusion(fuse4@channel_loop);
+  }
+  simpl!(fuse4);
 }
-simpl!(fuse4);
 array-slf(fuse4);
 simpl!(fuse4);
-fork-tile[2, 0, false, true](fuse4@channel_loop);
-let out = fork-split(fuse4@channel_loop);
-fork-unroll(out.cava_3.fj1);
-unforkify(fuse4@channel_loop);
+unforkify(fuse4@channel_loop | fuse4@cp_loop);
+
+fork-tile[4, 1, false, true](fuse4);
+fork-tile[8, 0, false, true](fuse4);
+fork-interchange[1, 2](fuse4);
+let split = fork-split(fuse4);
+fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2);
+fork-coalesce(split.cava_3.fj2);
 
 no-memset(fuse5@res1);
 no-memset(fuse5@res2);
@@ -133,13 +140,6 @@ simpl!(fuse5);
 array-slf(fuse5);
 simpl!(fuse5);
 
-fork-tile[4, 1, false, true](fuse4);
-fork-tile[8, 0, false, true](fuse4);
-fork-interchange[1, 2](fuse4);
-let split = fork-split(fuse4);
-fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2);
-fork-coalesce(split.cava_3.fj2);
-
 delete-uncalled(*);
 simpl!(*);