diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch index 0ef466c00d08f3edffe02bad9612ce23ebf14768..0981804f8bdf5d38139a22f87967b95b28df86dc 100644 --- a/juno_samples/cava/src/gpu.sch +++ b/juno_samples/cava/src/gpu.sch @@ -109,18 +109,25 @@ fixpoint { fork-coalesce(fuse4); } simpl!(fuse4); -fork-unroll(fuse4@channel_loop); -simpl!(fuse4); -fixpoint { - fork-fusion(fuse4@channel_loop); + +if !feature("dont_fuse_gamut") { + fork-unroll(fuse4@channel_loop); + simpl!(fuse4); + fixpoint { + fork-fusion(fuse4@channel_loop); + } + simpl!(fuse4); } -simpl!(fuse4); array-slf(fuse4); simpl!(fuse4); -fork-tile[2, 0, false, true](fuse4@channel_loop); -let out = fork-split(fuse4@channel_loop); -fork-unroll(out.cava_3.fj1); -unforkify(fuse4@channel_loop); +unforkify(fuse4@channel_loop | fuse4@cp_loop); + +fork-tile[4, 1, false, true](fuse4); +fork-tile[8, 0, false, true](fuse4); +fork-interchange[1, 2](fuse4); +let split = fork-split(fuse4); +fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2); +fork-coalesce(split.cava_3.fj2); no-memset(fuse5@res1); no-memset(fuse5@res2); @@ -133,13 +140,6 @@ simpl!(fuse5); array-slf(fuse5); simpl!(fuse5); -fork-tile[4, 1, false, true](fuse4); -fork-tile[8, 0, false, true](fuse4); -fork-interchange[1, 2](fuse4); -let split = fork-split(fuse4); -fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2); -fork-coalesce(split.cava_3.fj2); - delete-uncalled(*); simpl!(*);