diff --git a/hercules_opt/src/fork_transforms.rs b/hercules_opt/src/fork_transforms.rs
index e635b3c00d7bfa0090376d8056e65d8d01e60ce2..ae3dfe22bd73dd1df395f25c45db6b3f27c386a5 100644
--- a/hercules_opt/src/fork_transforms.rs
+++ b/hercules_opt/src/fork_transforms.rs
@@ -695,7 +695,8 @@ pub fn fork_coalesce_helper(
 
         editor.edit(|mut edit| {
             let new_tid = edit.add_node(new_tid);
-            let edit = edit.replace_all_uses(tid, new_tid)?;
+            let mut edit = edit.replace_all_uses(tid, new_tid)?;
+            edit.sub_edit(tid, new_tid);
             Ok(edit)
         });
     }
@@ -1598,3 +1599,8 @@ pub fn clean_monoid_reduces(editor: &mut FunctionEditor, typing: &Vec<TypeID>) {
         }
     }
 }
+
+/*
+ * Looks for reads in fork-joins that are linear in the thread IDs for the fork-
+ * join.
+ */
diff --git a/juno_samples/fork_join_tests/src/fork_join_tests.jn b/juno_samples/fork_join_tests/src/fork_join_tests.jn
index 3b7c783308cd1949651afb1a7f0cdd708f3f71d7..334fc2bfe4f745cec9004fdf9ebdf80d11818c0f 100644
--- a/juno_samples/fork_join_tests/src/fork_join_tests.jn
+++ b/juno_samples/fork_join_tests/src/fork_join_tests.jn
@@ -124,7 +124,8 @@ fn test8(input : i32) -> i32[8] {
 }
 
 #[entry]
-fn test9<r, c, z : usize>(input : i32[r, c]) -> i32[r, c] {
+fn test9<r, c : usize>(input : i32[r, c]) -> i32[r, c] {
+  const z = 3;
   const rad = z / 2;
   @const let out : i32[r, c];
 
diff --git a/juno_samples/fork_join_tests/src/gpu.sch b/juno_samples/fork_join_tests/src/gpu.sch
index c554fd502292c040a405e0ffafb548844d0f1d5f..ca17f69223ae5c39e375f9f47b9badbf195cf0aa 100644
--- a/juno_samples/fork_join_tests/src/gpu.sch
+++ b/juno_samples/fork_join_tests/src/gpu.sch
@@ -73,6 +73,24 @@ dce(auto.test8);
 simplify-cfg(auto.test8);
 dce(auto.test8);
 
+fork-split(auto.test9@filter_loop);
+fork-unroll(auto.test9);
+fork-unroll(auto.test9);
+dce(auto.test9);
+ccp(auto.test9);
+gvn(auto.test9);
+phi-elim(auto.test9);
+dce(auto.test9);
+fixpoint {
+  predication(auto.test9);
+  simplify-cfg(auto.test9);
+}
+ccp(auto.test9);
+gvn(auto.test9);
+phi-elim(auto.test9);
+lift-dc-math(auto.test9);
+dce(auto.test9);
+
 ip-sroa(*);
 sroa(*);
 dce(*);
@@ -80,7 +98,7 @@ ccp(*);
 gvn(*);
 phi-elim(*);
 dce(*);
-gcm(*);
 
+gcm(*);
 float-collections(test2, auto.test2, test4, auto.test4, test5, auto.test5);
 gcm(*);
diff --git a/juno_samples/fork_join_tests/src/main.rs b/juno_samples/fork_join_tests/src/main.rs
index fa99f759ca08814215d1b138758f833f83c38c5f..e66309b22b0650feaab315829dd412f6275e9a99 100644
--- a/juno_samples/fork_join_tests/src/main.rs
+++ b/juno_samples/fork_join_tests/src/main.rs
@@ -61,7 +61,7 @@ fn main() {
         let mut r = runner!(test9);
         let input = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
         let input = HerculesImmBox::from(&input as &[i32]);
-        let output = r.run(3, 3, 3, input.to()).await;
+        let output = r.run(3, 3, input.to()).await;
         let correct = vec![
             1 + 2 + 4 + 5,
             1 + 2 + 3 + 4 + 5 + 6,