diff --git a/juno_samples/matmul/src/gpu_matmul.jn b/juno_samples/matmul/src/gpu_matmul.jn
deleted file mode 100644
index e719ba9bc7a318f264b75cdf08542787c39e38da..0000000000000000000000000000000000000000
--- a/juno_samples/matmul/src/gpu_matmul.jn
+++ /dev/null
@@ -1,45 +0,0 @@
-#[entry]
-fn tiled_64_matmul_with_n_1024<m : usize, l : usize>(a : i32[1024, m], b : i32[m, l]) -> i32 {
-  let res = 0;
-  
-  for bi = 0 to 16 {
-    for bk = 0 to l / 64 {
-      // TODO: make these all the same size, clone analysis should undo GVN's
-      // combining of these three arrays.
-      let atile : i32[66, 64];
-      let btile : i32[65, 64];
-      let ctile : i32[64, 64];
-
-      for tile_idx = 0 to m / 64 {
-        for ti = 0 to 64 {
-	  for tk = 0 to 64 {
-	    atile[ti, tk] = a[bi * 64 + ti, tile_idx * 64 + tk];
-	    btile[ti, tk] = b[tile_idx * 64 + ti, bk * 64 + tk];
-	    // TODO: remove setting ctile to zero explicitly, clone analysis
-	    // should see a lack of a phi for ctile in the block loops and
-	    // induce a copy of an initial value of ctile (all zeros) on each
-	    // iteration of the block loops.
-	    ctile[ti, tk] = 0;
-	  }
-	}
-        for ti = 0 to 64 {
-	  for tk = 0 to 64 {
-	    let c_acc = ctile[ti, tk];
-	    for inner_idx = 0 to 64 {
-	      c_acc += atile[ti, inner_idx] * btile[inner_idx, tk];
-	    }
-	    ctile[ti, tk] = c_acc;
-	  }
-	}
-      }
-
-      for ti = 0 to 64 {
-        for tk = 0 to 64 {
-	  res += ctile[ti, tk];
-	}
-      }
-    }
-  }
-
-  return res;
-}