diff --git a/hercules_opt/src/fork_transforms.rs b/hercules_opt/src/fork_transforms.rs index 0e94397377ed7c6d69b1b7641e7d9040f5753ad0..ff0f0283767996914e8f9b2274ed9a6d538b1812 100644 --- a/hercules_opt/src/fork_transforms.rs +++ b/hercules_opt/src/fork_transforms.rs @@ -916,7 +916,9 @@ pub fn chunk_all_forks_unguarded( }; for (fork, _) in fork_join_map { - chunk_fork_unguarded(editor, *fork, dim_idx, dc_id, order); + if editor.is_mutable(*fork) { + chunk_fork_unguarded(editor, *fork, dim_idx, dc_id, order); + } } } // Splits a dimension of a single fork join into multiple. diff --git a/juno_samples/cava/src/cava.jn b/juno_samples/cava/src/cava.jn index dbe799f9f23e63ed40157c9d57f5c4c8d9b4eb23..4d02b2cdb5f843774ebae9b873388e03c82071b0 100644 --- a/juno_samples/cava/src/cava.jn +++ b/juno_samples/cava/src/cava.jn @@ -142,7 +142,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>( ) -> f32[CHAN, row, col] { @res let result : f32[CHAN, row, col]; - for r = 0 to row { + @image_loop for r = 0 to row { for c = 0 to col { @l2 let l2_dist : f32[num_ctrl_pts]; for cp = 0 to num_ctrl_pts { diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index efa7302eec7556494717ef8acacfeccd22011d86..8f22b37d4c3a77a31e8d2467c8e6e130c281513f 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -113,6 +113,14 @@ fixpoint { simpl!(fuse4); array-slf(fuse4); simpl!(fuse4); +let par = fuse4@image_loop \ fuse4@channel_loop; +fork-tile[4, 1, false, false](par); +fork-tile[4, 0, false, false](par); +fork-interchange[1, 2](par); +let split = fork-split(par); +let fuse4_body = outline(split.cava_3.fj2); +fork-coalesce(fuse4, fuse4_body); +simpl!(fuse4, fuse4_body); no-memset(fuse5@res1); no-memset(fuse5@res2); @@ -128,8 +136,8 @@ simpl!(fuse5); delete-uncalled(*); simpl!(*); -fork-split(fuse1, fuse2, fuse3, fuse4, fuse5); -unforkify(fuse1, fuse2, fuse3, fuse4, fuse5); +fork-split(fuse1, fuse2, fuse3, fuse4_body, fuse5); +unforkify(fuse1, fuse2, fuse3, fuse4_body, fuse5); simpl!(*);