diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs
index 2579e73ec889424f19e7d5ac6940a4566e45634d..d950941a4acba886f47cdd0e99cb3d9a48459636 100644
--- a/hercules_opt/src/gcm.rs
+++ b/hercules_opt/src/gcm.rs
@@ -879,8 +879,8 @@ fn spill_clones(
                     || editor.func().nodes[a.idx()].is_reduce())
                 && !editor.func().nodes[a.idx()]
                     .try_reduce()
-                    .map(|(_, init, _)| {
-                        init == *b
+                    .map(|(_, init, reduct)| {
+                        (init == *b || reduct == *b)
                             && editor.func().schedules[a.idx()].contains(&Schedule::ParallelReduce)
                     })
                     .unwrap_or(false)
diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch
index 5be2d903a3d337c941c6193f4c3fc2d5d802f4d5..6c4d027b77df936c5840237c211950d6c0430082 100644
--- a/juno_samples/rodinia/bfs/src/gpu.sch
+++ b/juno_samples/rodinia/bfs/src/gpu.sch
@@ -27,6 +27,7 @@ simpl!(*);
 predication(*);
 simpl!(*);
 
+unforkify(cost_init);
 parallel-reduce(loop1);
 forkify(*);
 fork-guard-elim(*);
@@ -35,5 +36,4 @@ predication(*);
 reduce-slf(*);
 simpl!(*);
 
-unforkify(cost_init);
 gcm(*);