Skip to content
Snippets Groups Projects
Commit 4cd1d610 authored by Russel Arbore's avatar Russel Arbore
Browse files

opt bfs

parent 375199f5
No related branches found
No related tags found
2 merge requests!215Large benches,!214More optimizations
Pipeline #202019 passed
type Node = struct { edge_start: u32; num_edges: u32; }; type Node = struct { edge_start: u32; num_edges: u32; };
type StopProd = struct { stop: bool; };
fn make_stop_prod() -> StopProd {
let ret : StopProd;
ret.stop = true;
return ret;
}
#[entry] #[entry]
fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n] { fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n] {
...@@ -23,8 +30,6 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n] ...@@ -23,8 +30,6 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n]
let updated: bool[n]; let updated: bool[n];
while !stop { while !stop {
stop = true;
@loop1 for i in 0..n { @loop1 for i in 0..n {
if mask[i] { if mask[i] {
mask[i] = false; mask[i] = false;
...@@ -42,15 +47,16 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n] ...@@ -42,15 +47,16 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n]
} }
} }
@make let stop_prod = make_stop_prod();
@loop2 for i in 0..n { @loop2 for i in 0..n {
stop = stop && !updated[i]; if updated[i] {
} mask[i] = true;
visited[i] = true;
@loop3 for i in 0..n { updated[i] = false;
mask[i] = mask[i] || updated[i]; stop_prod.stop = updated[i];
visited[i] = visited[i] || updated[i]; }
updated[i] = false;
} }
stop = stop_prod.stop;
} }
return cost; return cost;
......
...@@ -12,8 +12,7 @@ phi-elim(bfs); ...@@ -12,8 +12,7 @@ phi-elim(bfs);
no-memset(bfs@cost); no-memset(bfs@cost);
let init = outline(bfs@cost_init); let init = outline(bfs@cost_init);
let traverse = outline(bfs@loop1); let traverse = outline(bfs@loop1);
let collect = outline(bfs@loop2 | bfs@loop3); let collect = outline(bfs@loop2);
parallel-reduce(traverse);
simpl!(*); simpl!(*);
predication(*); predication(*);
...@@ -29,12 +28,13 @@ predication(*); ...@@ -29,12 +28,13 @@ predication(*);
simpl!(*); simpl!(*);
reduce-slf(*); reduce-slf(*);
simpl!(*); simpl!(*);
slf(*);
simpl!(*);
fixpoint { fixpoint {
forkify(collect); forkify(collect);
fork-guard-elim(collect); fork-guard-elim(collect);
} }
fork-fusion(collect);
simpl!(collect); simpl!(collect);
unforkify(init, traverse, collect); unforkify(init, traverse, collect);
......
...@@ -11,10 +11,11 @@ macro simpl!(X) { ...@@ -11,10 +11,11 @@ macro simpl!(X) {
phi-elim(bfs); phi-elim(bfs);
no-memset(bfs@cost); no-memset(bfs@cost);
let init = outline(bfs@cost_init); let init = outline(bfs@cost_init);
let loop1 = outline(bfs@loop1); let traverse = outline(bfs@loop1);
let loop2 = outline(bfs@loop2); let collect = outline(bfs@loop2);
let loop3 = outline(bfs@loop3); parallel-reduce(traverse, collect);
parallel-reduce(loop1); no-memset(make_stop_prod);
gpu(traverse, make_stop_prod, collect);
simpl!(*); simpl!(*);
predication(*); predication(*);
...@@ -31,41 +32,14 @@ simpl!(*); ...@@ -31,41 +32,14 @@ simpl!(*);
reduce-slf(*); reduce-slf(*);
simpl!(*); simpl!(*);
fork-tile[32, 0, false, true](loop1);
fork-split(loop1);
gpu(loop1);
fixpoint { fixpoint {
forkify(loop2, loop3); forkify(collect);
fork-guard-elim(loop2, loop3); fork-guard-elim(collect);
} }
simpl!(collect);
simpl!(loop2, loop3); fork-tile[32, 0, false, true](traverse, collect);
fork-tile[32, 0, false, true](loop2, loop3); fork-split(traverse, collect);
let out = fork-split(loop2, loop3);
clean-monoid-reduces(loop2, loop3);
simpl!(loop2, loop3);
gpu(loop3);
let fission1 = fork-fission[out.bfs_2.fj0](loop2);
simpl!(loop2);
fork-tile[32, 0, false, true](fission1.bfs_2.fj_bottom);
let out = fork-split(fission1.bfs_2.fj_bottom);
clean-monoid-reduces(loop2);
simpl!(loop2);
let fission2 = fork-fission[out.bfs_2.fj0](loop2);
simpl!(loop2);
fork-tile[32, 0, false, true](fission2.bfs_2.fj_bottom);
let out = fork-split(fission2.bfs_2.fj_bottom);
clean-monoid-reduces(loop2);
simpl!(loop2);
let top = outline(fission1.bfs_2.fj_top);
let middle = outline(fission2.bfs_2.fj_top);
let bottom = outline(out.bfs_2.fj0);
const-inline(loop2, top, middle, bottom);
no-memset(top, middle);
gpu(top, middle, bottom);
simpl!(loop2, top, middle, bottom);
unforkify(init); unforkify(init);
gcm(*); gcm(*);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment