diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs index 2579e73ec889424f19e7d5ac6940a4566e45634d..d950941a4acba886f47cdd0e99cb3d9a48459636 100644 --- a/hercules_opt/src/gcm.rs +++ b/hercules_opt/src/gcm.rs @@ -879,8 +879,8 @@ fn spill_clones( || editor.func().nodes[a.idx()].is_reduce()) && !editor.func().nodes[a.idx()] .try_reduce() - .map(|(_, init, _)| { - init == *b + .map(|(_, init, reduct)| { + (init == *b || reduct == *b) && editor.func().schedules[a.idx()].contains(&Schedule::ParallelReduce) }) .unwrap_or(false) diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch index 5be2d903a3d337c941c6193f4c3fc2d5d802f4d5..6c4d027b77df936c5840237c211950d6c0430082 100644 --- a/juno_samples/rodinia/bfs/src/gpu.sch +++ b/juno_samples/rodinia/bfs/src/gpu.sch @@ -27,6 +27,7 @@ simpl!(*); predication(*); simpl!(*); +unforkify(cost_init); parallel-reduce(loop1); forkify(*); fork-guard-elim(*); @@ -35,5 +36,4 @@ predication(*); reduce-slf(*); simpl!(*); -unforkify(cost_init); gcm(*);