Skip to content
Snippets Groups Projects
Commit 51e246a5 authored by Russel Arbore's avatar Russel Arbore
Browse files

gpu schedule

parent 49a0d4c7
No related branches found
No related tags found
1 merge request!190Set up cava benchmark
Pipeline #201753 passed
This commit is part of merge request !190. Comments created here will be created in the context of that merge request.
...@@ -40,7 +40,7 @@ fn scale<row : usize, col : usize>(input : u8[CHAN, row, col]) -> f32[CHAN, row, ...@@ -40,7 +40,7 @@ fn scale<row : usize, col : usize>(input : u8[CHAN, row, col]) -> f32[CHAN, row,
fn demosaic<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, row, col] { fn demosaic<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, row, col] {
@res2 let res : f32[CHAN, row, col]; @res2 let res : f32[CHAN, row, col];
for r = 1 to row-1 { @loop for r = 1 to row-1 {
for c = 1 to col-1 { for c = 1 to col-1 {
if r % 2 == 0 && c % 2 == 0 { if r % 2 == 0 && c % 2 == 0 {
let R1 = input[0, r, c-1]; let R1 = input[0, r, c-1];
......
...@@ -46,6 +46,9 @@ fixpoint { ...@@ -46,6 +46,9 @@ fixpoint {
} }
predication(fuse1); predication(fuse1);
simpl!(fuse1); simpl!(fuse1);
write-predication(fuse1);
simpl!(fuse1);
parallel-reduce(fuse1@loop);
inline(fuse2); inline(fuse2);
no-memset(fuse2@res); no-memset(fuse2@res);
......
gvn(*); macro simpl!(X) {
phi-elim(*); ccp(X);
dce(*); simplify-cfg(X);
lift-dc-math(X);
gvn(X);
phi-elim(X);
dce(X);
infer-schedules(X);
}
simpl!(*);
let fuse1 = outline(cava@fuse1);
inline(fuse1);
gpu(fuse1);
let fuse2 = outline(cava@fuse2);
inline(fuse2);
gpu(fuse2);
let fuse3 = outline(cava@fuse3);
inline(fuse3);
gpu(fuse3);
inline(denoise); let fuse4 = outline(cava@fuse4);
gpu(scale, demosaic, denoise, transform, gamut, tone_map, descale); inline(fuse4);
gpu(fuse4);
let fuse5 = outline(cava@fuse5);
inline(fuse5);
gpu(fuse5);
ip-sroa(*); ip-sroa(*);
sroa(*); sroa(*);
dce(*); simpl!(*);
gvn(*);
phi-elim(*);
dce(*);
// forkify(*); no-memset(fuse1@res1);
infer-schedules(*); no-memset(fuse1@res2);
fixpoint {
forkify(fuse1);
fork-guard-elim(fuse1);
fork-coalesce(fuse1);
}
simpl!(fuse1);
array-slf(fuse1);
loop-bound-canon(fuse1);
fixpoint {
forkify(fuse1);
fork-guard-elim(fuse1);
fork-coalesce(fuse1);
}
predication(fuse1);
simpl!(fuse1);
write-predication(fuse1);
simpl!(fuse1);
parallel-reduce(fuse1@loop);
gcm(*); inline(fuse2);
no-memset(fuse2@res);
no-memset(fuse2@filter);
no-memset(fuse2@tmp);
fixpoint {
forkify(fuse2);
fork-guard-elim(fuse2);
fork-coalesce(fuse2);
}
simpl!(fuse2);
predication(fuse2);
simpl!(fuse2);
let median = outline(fuse2@median);
fork-unroll(median@medianOuter);
simpl!(median);
fixpoint {
forkify(median);
fork-guard-elim(median);
}
simpl!(median);
fixpoint {
fork-unroll(median);
}
ccp(median);
array-to-product(median);
sroa(median);
phi-elim(median);
predication(median);
simpl!(median);
inline(fuse2);
ip-sroa(*);
sroa(*);
array-slf(fuse2);
write-predication(fuse2);
simpl!(fuse2);
no-memset(fuse3@res);
fixpoint { fixpoint {
float-collections(*); forkify(fuse3);
dce(*); fork-guard-elim(fuse3);
gcm(*); fork-coalesce(fuse3);
} }
simpl!(fuse3);
no-memset(fuse4@res);
no-memset(fuse4@l2);
fixpoint {
forkify(fuse4);
fork-guard-elim(fuse4);
fork-coalesce(fuse4);
}
simpl!(fuse4);
fork-unroll(fuse4@channel_loop);
simpl!(fuse4);
fixpoint {
fork-fusion(fuse4@channel_loop);
}
simpl!(fuse4);
array-slf(fuse4);
simpl!(fuse4);
no-memset(fuse5@res1);
no-memset(fuse5@res2);
fixpoint {
forkify(fuse5);
fork-guard-elim(fuse5);
fork-coalesce(fuse5);
}
simpl!(fuse5);
array-slf(fuse5);
simpl!(fuse5);
delete-uncalled(*);
simpl!(*);
delete-uncalled(*);
gcm(*);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment