diff --git a/juno_samples/fork_join_tests/src/cpu.sch b/juno_samples/fork_join_tests/src/cpu.sch
index f46c91d6a84a08b2258332af1dc5d6a662d86639..5f3ff94e9a995b1ff2f47cb4fcb4d499896189f0 100644
--- a/juno_samples/fork_join_tests/src/cpu.sch
+++ b/juno_samples/fork_join_tests/src/cpu.sch
@@ -3,7 +3,7 @@ gvn(*);
 phi-elim(*);
 dce(*);
 
-let auto = auto-outline(test1, test2, test3, test4, test5, test7, test8, test9);
+let auto = auto-outline(test1, test2, test3, test4, test5, test7, test8, test9, test10);
 cpu(auto.test1);
 cpu(auto.test2);
 cpu(auto.test3);
@@ -12,6 +12,7 @@ cpu(auto.test5);
 cpu(auto.test7);
 cpu(auto.test8);
 cpu(auto.test9);
+cpu(auto.test10);
 
 let test1_cpu = auto.test1;
 rename["test1_cpu"](test1_cpu);
@@ -94,6 +95,11 @@ dce(auto.test8);
 simplify-cfg(auto.test8);
 dce(auto.test8);
 
-no-memset(test9@const);
+array-slf(auto.test10);
+ccp(auto.test10);
+dce(auto.test10);
+simplify-cfg(auto.test10);
+dce(auto.test10);
+unforkify(auto.test10);
 
 gcm(*);
diff --git a/juno_samples/fork_join_tests/src/fork_join_tests.jn b/juno_samples/fork_join_tests/src/fork_join_tests.jn
index 334fc2bfe4f745cec9004fdf9ebdf80d11818c0f..2eab56b9400a2695548547b56d52a87eb0751771 100644
--- a/juno_samples/fork_join_tests/src/fork_join_tests.jn
+++ b/juno_samples/fork_join_tests/src/fork_join_tests.jn
@@ -147,3 +147,16 @@ fn test9<r, c : usize>(input : i32[r, c]) -> i32[r, c] {
 
   return out;
 }
+
+#[entry]
+fn test10(k1 : i32[8], k2 : i32[8], v : i32[8]) -> i32 {
+  @const let s : i32[8];
+  for i = 0 to 8 {
+    s[i] = v[k1[i] as u64];
+  }
+  let sum = 0;
+  for i = 0 to 8 {
+    sum += s[k2[i] as u64];
+  }
+  return sum;
+}
\ No newline at end of file
diff --git a/juno_samples/fork_join_tests/src/gpu.sch b/juno_samples/fork_join_tests/src/gpu.sch
index 81dc8d9854776931f4598a9010837008796baaf8..43b28e34e74f51da8d0df12901be065bbc8a9a5a 100644
--- a/juno_samples/fork_join_tests/src/gpu.sch
+++ b/juno_samples/fork_join_tests/src/gpu.sch
@@ -8,12 +8,13 @@ no-memset(test6@const);
 no-memset(test8@const1);
 no-memset(test8@const2);
 no-memset(test9@const);
+no-memset(test10@const);
 
 gvn(*);
 phi-elim(*);
 dce(*);
 
-let auto = auto-outline(test1, test2, test3, test4, test5, test7, test8, test9);
+let auto = auto-outline(test1, test2, test3, test4, test5, test7, test8, test9, test10);
 gpu(auto.test1);
 gpu(auto.test2);
 gpu(auto.test3);
@@ -22,6 +23,7 @@ gpu(auto.test5);
 gpu(auto.test7);
 gpu(auto.test8);
 gpu(auto.test9);
+gpu(auto.test10);
 
 ip-sroa(*);
 sroa(*);
diff --git a/juno_samples/fork_join_tests/src/main.rs b/juno_samples/fork_join_tests/src/main.rs
index e66309b22b0650feaab315829dd412f6275e9a99..0b37a99d6f14f17238391ca25dfccbfc9a753441 100644
--- a/juno_samples/fork_join_tests/src/main.rs
+++ b/juno_samples/fork_join_tests/src/main.rs
@@ -74,6 +74,20 @@ fn main() {
             5 + 6 + 8 + 9,
         ];
         assert(&correct, output);
+
+        let mut r = runner!(test10);
+        let k1 = vec![0, 4, 3, 7, 3, 4, 2, 1];
+        let k2 = vec![6, 4, 3, 2, 4, 1, 0, 5];
+        let v = vec![3, -499, 4, 32, -2, 55, -74, 10];
+        let mut correct = 0;
+        for i in 0..8 {
+            correct += v[k1[k2[i] as usize] as usize];
+        }
+        let k1 = HerculesImmBox::from(&k1 as &[i32]);
+        let k2 = HerculesImmBox::from(&k2 as &[i32]);
+        let v = HerculesImmBox::from(&v as &[i32]);
+        let output = r.run(k1.to(), k2.to(), v.to()).await;
+        assert_eq!(output, correct);
     });
 }