diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index 7d788c2f099ca664d9e2ff751fde3394a1623cdd..32e2b63b55c69e6e2c05cabc3221f3c8feff674b 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -111,15 +111,11 @@ fixpoint { fork-fusion(fuse4@channel_loop); } simpl!(fuse4); - -if !feature("dont_fuse_gamut") { - array-slf(fuse4); - simpl!(fuse4); -} +array-slf(fuse4); +simpl!(fuse4); if !feature("seq") { let par = fuse4@image_loop \ fuse4@channel_loop; - let par = par \ fuse4@cp_loop; fork-tile[4, 1, false, false](par); fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); @@ -128,6 +124,8 @@ if !feature("seq") { fork-coalesce(fuse4, fuse4_body); simpl!(fuse4, fuse4_body); fuse4 = fuse4_body; +} else { + fork-tile[6, 0, false, true](fuse4@channel_loop); } no-memset(fuse5@res1); diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch index daf523391fac697c2c1edf8ee02f4f13141bfcd2..0ef466c00d08f3edffe02bad9612ce23ebf14768 100644 --- a/juno_samples/cava/src/gpu.sch +++ b/juno_samples/cava/src/gpu.sch @@ -115,25 +115,12 @@ fixpoint { fork-fusion(fuse4@channel_loop); } simpl!(fuse4); - -if !feature("dont_fuse_gamut") { - array-slf(fuse4); - simpl!(fuse4); - fork-tile[2, 0, false, true](fuse4@channel_loop); - let out = fork-split(fuse4@channel_loop); - fork-unroll(out.cava_3.fj1); - unforkify(fuse4@channel_loop); -} - -let par = fuse4@image_loop \ fuse4@channel_loop; -let par = par \ fuse4@cp_loop; - -fork-tile[4, 1, false, true](par); -fork-tile[8, 0, false, true](par); -fork-interchange[1, 2](par); -let split = fork-split(par); -fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2); -fork-coalesce(split.cava_3.fj2); +array-slf(fuse4); +simpl!(fuse4); +fork-tile[2, 0, false, true](fuse4@channel_loop); +let out = fork-split(fuse4@channel_loop); +fork-unroll(out.cava_3.fj1); +unforkify(fuse4@channel_loop); no-memset(fuse5@res1); no-memset(fuse5@res2); @@ -146,7 +133,12 @@ simpl!(fuse5); array-slf(fuse5); simpl!(fuse5); - +fork-tile[4, 1, false, true](fuse4); +fork-tile[8, 0, false, true](fuse4); +fork-interchange[1, 2](fuse4); +let split = fork-split(fuse4); +fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2); +fork-coalesce(split.cava_3.fj2); delete-uncalled(*); simpl!(*);