Skip to content
Snippets Groups Projects
Commit 4cd1d610 authored by Russel Arbore's avatar Russel Arbore
Browse files

opt bfs

parent 375199f5
No related branches found
No related tags found
2 merge requests!215Large benches,!214More optimizations
Pipeline #202019 passed
type Node = struct { edge_start: u32; num_edges: u32; };
type StopProd = struct { stop: bool; };
fn make_stop_prod() -> StopProd {
let ret : StopProd;
ret.stop = true;
return ret;
}
#[entry]
fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n] {
......@@ -23,8 +30,6 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n]
let updated: bool[n];
while !stop {
stop = true;
@loop1 for i in 0..n {
if mask[i] {
mask[i] = false;
......@@ -42,15 +47,16 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n]
}
}
@make let stop_prod = make_stop_prod();
@loop2 for i in 0..n {
stop = stop && !updated[i];
}
@loop3 for i in 0..n {
mask[i] = mask[i] || updated[i];
visited[i] = visited[i] || updated[i];
updated[i] = false;
if updated[i] {
mask[i] = true;
visited[i] = true;
updated[i] = false;
stop_prod.stop = updated[i];
}
}
stop = stop_prod.stop;
}
return cost;
......
......@@ -12,8 +12,7 @@ phi-elim(bfs);
no-memset(bfs@cost);
let init = outline(bfs@cost_init);
let traverse = outline(bfs@loop1);
let collect = outline(bfs@loop2 | bfs@loop3);
parallel-reduce(traverse);
let collect = outline(bfs@loop2);
simpl!(*);
predication(*);
......@@ -29,12 +28,13 @@ predication(*);
simpl!(*);
reduce-slf(*);
simpl!(*);
slf(*);
simpl!(*);
fixpoint {
forkify(collect);
fork-guard-elim(collect);
}
fork-fusion(collect);
simpl!(collect);
unforkify(init, traverse, collect);
......
......@@ -11,10 +11,11 @@ macro simpl!(X) {
phi-elim(bfs);
no-memset(bfs@cost);
let init = outline(bfs@cost_init);
let loop1 = outline(bfs@loop1);
let loop2 = outline(bfs@loop2);
let loop3 = outline(bfs@loop3);
parallel-reduce(loop1);
let traverse = outline(bfs@loop1);
let collect = outline(bfs@loop2);
parallel-reduce(traverse, collect);
no-memset(make_stop_prod);
gpu(traverse, make_stop_prod, collect);
simpl!(*);
predication(*);
......@@ -31,41 +32,14 @@ simpl!(*);
reduce-slf(*);
simpl!(*);
fork-tile[32, 0, false, true](loop1);
fork-split(loop1);
gpu(loop1);
fixpoint {
forkify(loop2, loop3);
fork-guard-elim(loop2, loop3);
forkify(collect);
fork-guard-elim(collect);
}
simpl!(collect);
simpl!(loop2, loop3);
fork-tile[32, 0, false, true](loop2, loop3);
let out = fork-split(loop2, loop3);
clean-monoid-reduces(loop2, loop3);
simpl!(loop2, loop3);
gpu(loop3);
let fission1 = fork-fission[out.bfs_2.fj0](loop2);
simpl!(loop2);
fork-tile[32, 0, false, true](fission1.bfs_2.fj_bottom);
let out = fork-split(fission1.bfs_2.fj_bottom);
clean-monoid-reduces(loop2);
simpl!(loop2);
let fission2 = fork-fission[out.bfs_2.fj0](loop2);
simpl!(loop2);
fork-tile[32, 0, false, true](fission2.bfs_2.fj_bottom);
let out = fork-split(fission2.bfs_2.fj_bottom);
clean-monoid-reduces(loop2);
simpl!(loop2);
let top = outline(fission1.bfs_2.fj_top);
let middle = outline(fission2.bfs_2.fj_top);
let bottom = outline(out.bfs_2.fj0);
const-inline(loop2, top, middle, bottom);
no-memset(top, middle);
gpu(top, middle, bottom);
simpl!(loop2, top, middle, bottom);
fork-tile[32, 0, false, true](traverse, collect);
fork-split(traverse, collect);
unforkify(init);
gcm(*);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment