diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch
index ae67fdd987e961a95311a7d3aaa0f94fe31f1687..f94c473fb83dd9c62486de97bb9e8f80a0a4f221 100644
--- a/juno_samples/rodinia/bfs/src/cpu.sch
+++ b/juno_samples/rodinia/bfs/src/cpu.sch
@@ -10,13 +10,15 @@ macro simpl!(X) {
 
 phi-elim(bfs);
 no-memset(bfs@cost);
-outline(bfs@cost_init);
+let init = outline(bfs@cost_init);
 let loop1 = outline(bfs@loop1);
 let loop2 = outline(bfs@loop2);
+parallel-reduce(loop1);
 
 simpl!(*);
 predication(*);
 const-inline(*);
+loop-bound-canon(*);
 simpl!(*);
 fixpoint {
   forkify(*);
@@ -26,5 +28,12 @@ simpl!(*);
 predication(*);
 simpl!(*);
 
-unforkify(*);
+fork-tile[32, 0, false, false](loop1);
+let split = fork-split(loop1);
+let out = outline(split.bfs_1.fj1);
+unforkify(out);
+inline(bfs@loop1);
+delete-uncalled(*);
+
+unforkify(init);
 gcm(*);
diff --git a/juno_samples/rodinia/bfs/src/lib.rs b/juno_samples/rodinia/bfs/src/lib.rs
index 218e9bb0ffd73a2fb42b21a1fcc12fcc2cb6bb68..f64035409552c5b223b54bc0be8e8bf512698d30 100644
--- a/juno_samples/rodinia/bfs/src/lib.rs
+++ b/juno_samples/rodinia/bfs/src/lib.rs
@@ -19,6 +19,7 @@ pub struct BFSInputs {
 fn run_bfs(nodes: &[Node], source: u32, edges: &[u32]) -> Vec<i32> {
     let n = nodes.len() as u64;
     let m = edges.len() as u64;
+    println!("Running with {} nodes and {} edges.", n, m);
 
     let nodes = HerculesImmBox::from(nodes);
     let edges = HerculesImmBox::from(edges);