diff --git a/hercules_opt/src/fork_transforms.rs b/hercules_opt/src/fork_transforms.rs index 6998f8794c029a0c8d66ec8b557410557483d2ad..c46e4e985449a3fae8aa3041782b02ab8213c7cb 100644 --- a/hercules_opt/src/fork_transforms.rs +++ b/hercules_opt/src/fork_transforms.rs @@ -1741,6 +1741,7 @@ fn extend_fork(editor: &mut FunctionEditor, fork: NodeID, join: NodeID, multiple control: new_fork, dimension: idx, }); + edit.sub_edit(fork, tid); let old_bound = edit.add_node(Node::DynamicConstant { id: *old_factor }); edit.add_node(Node::Binary { op: BinaryOperator::LT, diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index ada2f552fb1f31412d3f9a0bfe5c27884d4d86e6..32e2b63b55c69e6e2c05cabc3221f3c8feff674b 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -124,6 +124,8 @@ if !feature("seq") { fork-coalesce(fuse4, fuse4_body); simpl!(fuse4, fuse4_body); fuse4 = fuse4_body; +} else { + fork-tile[6, 0, false, true](fuse4@channel_loop); } no-memset(fuse5@res1); diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch index ea6f0403c8f0824c0bcf27dc6dcd15649bcdb2ec..f564cd36571564dfc352315c848c14c36ad5970f 100644 --- a/juno_samples/rodinia/bfs/src/cpu.sch +++ b/juno_samples/rodinia/bfs/src/cpu.sch @@ -41,23 +41,25 @@ parallel-fork(traverse, collect); parallel-reduce(traverse, collect); if !feature("seq") { - fork-tile[32, 0, false, true](traverse, collect); - let (outer, inner) = fork-reshape[[1], [0]](traverse); + fork-tile[32, 0, false, false](traverse, collect); + let (outer, inner) = fork-reshape[[0], [1]](traverse); traverse = outline(inner); - let (outer, inner) = fork-reshape[[1], [0]](collect); + let (outer, inner) = fork-reshape[[0], [1]](collect); collect = outline(inner); - fork-tile[32, 0, false, true](init); - let (outer, inner) = fork-reshape[[1], [0]](init); + fork-tile[32, 0, false, false](init); + let (outer, inner) = fork-reshape[[0], [1]](init); let init_body = outline(inner); inline(bfs@cost_init, bfs@loop1, bfs@loop2); init = init_body; } +fork-tile[8, 0, false, true](init, traverse, collect); delete-uncalled(*); const-inline(*); simpl!(*); +fork-split(init, traverse, collect); unforkify(init, traverse, collect); simpl!(*); -gcm(*); \ No newline at end of file +gcm(*); diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch index 0253a0210f6cc2451d38399601ad39ff3ab9465a..541d15d7a5b90b17a484c98c2ed216c5912bd666 100644 --- a/juno_samples/rodinia/bfs/src/gpu.sch +++ b/juno_samples/rodinia/bfs/src/gpu.sch @@ -15,7 +15,7 @@ let traverse = outline(bfs@loop1); let collect = outline(bfs@loop2); parallel-reduce(traverse, collect); no-memset(make_stop_prod); -gpu(traverse, make_stop_prod, collect); +gpu(init, traverse, make_stop_prod, collect); simpl!(*); predication(*); @@ -38,12 +38,7 @@ fixpoint { } simpl!(collect); -fork-tile[32, 0, false, true](init); -let (outer, inner) = fork-reshape[[1], [0]](init); -let init_body = outline(inner); +fork-tile[1024, 0, false, true](init, traverse, collect); +fork-split(init, traverse, collect); -fork-tile[1024, 0, false, true](traverse, collect); -fork-split(traverse, collect); - -unforkify(init_body); gcm(*);