diff --git a/juno_samples/rodinia/bfs/src/bfs.jn b/juno_samples/rodinia/bfs/src/bfs.jn index 3d0280f1535b35bfd19bb4c1032eb3a224ac5a0d..f82d9d80cf5aa2275e4dcad941bb8128cdf6ee43 100644 --- a/juno_samples/rodinia/bfs/src/bfs.jn +++ b/juno_samples/rodinia/bfs/src/bfs.jn @@ -1,4 +1,11 @@ type Node = struct { edge_start: u32; num_edges: u32; }; +type StopProd = struct { stop: bool; }; + +fn make_stop_prod() -> StopProd { + let ret : StopProd; + ret.stop = true; + return ret; +} #[entry] fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n] { @@ -23,8 +30,6 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n] let updated: bool[n]; while !stop { - stop = true; - @loop1 for i in 0..n { if mask[i] { mask[i] = false; @@ -42,15 +47,16 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n] } } + @make let stop_prod = make_stop_prod(); @loop2 for i in 0..n { - stop = stop && !updated[i]; - } - - @loop3 for i in 0..n { - mask[i] = mask[i] || updated[i]; - visited[i] = visited[i] || updated[i]; - updated[i] = false; + if updated[i] { + mask[i] = true; + visited[i] = true; + updated[i] = false; + stop_prod.stop = updated[i]; + } } + stop = stop_prod.stop; } return cost; diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch index a33e361db3d4b9634f669226cf5f7198f010869e..589b93b1109b09e9146af593c8649987d6298635 100644 --- a/juno_samples/rodinia/bfs/src/cpu.sch +++ b/juno_samples/rodinia/bfs/src/cpu.sch @@ -12,8 +12,7 @@ phi-elim(bfs); no-memset(bfs@cost); let init = outline(bfs@cost_init); let traverse = outline(bfs@loop1); -let collect = outline(bfs@loop2 | bfs@loop3); -parallel-reduce(traverse); +let collect = outline(bfs@loop2); simpl!(*); predication(*); @@ -29,12 +28,13 @@ predication(*); simpl!(*); reduce-slf(*); simpl!(*); +slf(*); +simpl!(*); fixpoint { forkify(collect); fork-guard-elim(collect); } -fork-fusion(collect); simpl!(collect); unforkify(init, traverse, collect); diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch index 56489a23ed693014512de67f80c399e1031be7b8..d5c8dee62a5c55f8d8b17a6cedfe58ac16da7dbf 100644 --- a/juno_samples/rodinia/bfs/src/gpu.sch +++ b/juno_samples/rodinia/bfs/src/gpu.sch @@ -11,10 +11,11 @@ macro simpl!(X) { phi-elim(bfs); no-memset(bfs@cost); let init = outline(bfs@cost_init); -let loop1 = outline(bfs@loop1); -let loop2 = outline(bfs@loop2); -let loop3 = outline(bfs@loop3); -parallel-reduce(loop1); +let traverse = outline(bfs@loop1); +let collect = outline(bfs@loop2); +parallel-reduce(traverse, collect); +no-memset(make_stop_prod); +gpu(traverse, make_stop_prod, collect); simpl!(*); predication(*); @@ -31,41 +32,14 @@ simpl!(*); reduce-slf(*); simpl!(*); -fork-tile[32, 0, false, true](loop1); -fork-split(loop1); -gpu(loop1); - fixpoint { - forkify(loop2, loop3); - fork-guard-elim(loop2, loop3); + forkify(collect); + fork-guard-elim(collect); } +simpl!(collect); -simpl!(loop2, loop3); -fork-tile[32, 0, false, true](loop2, loop3); -let out = fork-split(loop2, loop3); -clean-monoid-reduces(loop2, loop3); -simpl!(loop2, loop3); -gpu(loop3); - -let fission1 = fork-fission[out.bfs_2.fj0](loop2); -simpl!(loop2); -fork-tile[32, 0, false, true](fission1.bfs_2.fj_bottom); -let out = fork-split(fission1.bfs_2.fj_bottom); -clean-monoid-reduces(loop2); -simpl!(loop2); -let fission2 = fork-fission[out.bfs_2.fj0](loop2); -simpl!(loop2); -fork-tile[32, 0, false, true](fission2.bfs_2.fj_bottom); -let out = fork-split(fission2.bfs_2.fj_bottom); -clean-monoid-reduces(loop2); -simpl!(loop2); -let top = outline(fission1.bfs_2.fj_top); -let middle = outline(fission2.bfs_2.fj_top); -let bottom = outline(out.bfs_2.fj0); -const-inline(loop2, top, middle, bottom); -no-memset(top, middle); -gpu(top, middle, bottom); -simpl!(loop2, top, middle, bottom); +fork-tile[32, 0, false, true](traverse, collect); +fork-split(traverse, collect); unforkify(init); gcm(*);