Skip to content
Snippets Groups Projects
Commit 5ba0636b authored by Russel Arbore's avatar Russel Arbore
Browse files

I am surprised these forkify

parent 8e8ed5ef
No related branches found
No related tags found
1 merge request!178Some Cava optimization
Pipeline #201632 passed
This commit is part of merge request !178. Comments created here will be created in the context of that merge request.
...@@ -22,7 +22,7 @@ fn medianMatrix<a : number, rows, cols : usize>(m : a[rows, cols]) -> a { ...@@ -22,7 +22,7 @@ fn medianMatrix<a : number, rows, cols : usize>(m : a[rows, cols]) -> a {
const CHAN : u64 = 3; const CHAN : u64 = 3;
fn scale<row : usize, col : usize>(input : u8[CHAN, row, col]) -> f32[CHAN, row, col] { fn scale<row : usize, col : usize>(input : u8[CHAN, row, col]) -> f32[CHAN, row, col] {
@const let res : f32[CHAN, row, col]; @res1 let res : f32[CHAN, row, col];
for chan = 0 to CHAN { for chan = 0 to CHAN {
for r = 0 to row { for r = 0 to row {
...@@ -50,7 +50,7 @@ fn descale<row : usize, col : usize>(input : f32[CHAN, row, col]) -> u8[CHAN, ro ...@@ -50,7 +50,7 @@ fn descale<row : usize, col : usize>(input : f32[CHAN, row, col]) -> u8[CHAN, ro
} }
fn demosaic<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, row, col] { fn demosaic<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, row, col] {
let res : f32[CHAN, row, col]; @res2 let res : f32[CHAN, row, col];
for r = 1 to row-1 { for r = 1 to row-1 {
for c = 1 to col-1 { for c = 1 to col-1 {
...@@ -129,7 +129,7 @@ fn denoise<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, r ...@@ -129,7 +129,7 @@ fn denoise<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, r
fn transform<row : usize, col : usize> fn transform<row : usize, col : usize>
(input : f32[CHAN, row, col], tstw_trans : f32[CHAN, CHAN]) (input : f32[CHAN, row, col], tstw_trans : f32[CHAN, CHAN])
-> f32[CHAN, row, col] { -> f32[CHAN, row, col] {
let result : f32[CHAN, row, col]; @res let result : f32[CHAN, row, col];
for chan = 0 to CHAN { for chan = 0 to CHAN {
for r = 0 to row { for r = 0 to row {
...@@ -152,11 +152,11 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>( ...@@ -152,11 +152,11 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>(
weights : f32[num_ctrl_pts, CHAN], weights : f32[num_ctrl_pts, CHAN],
coefs : f32[4, CHAN] coefs : f32[4, CHAN]
) -> f32[CHAN, row, col] { ) -> f32[CHAN, row, col] {
let result : f32[CHAN, row, col]; @res let result : f32[CHAN, row, col];
let l2_dist : f32[num_ctrl_pts];
for r = 0 to row { for r = 0 to row {
for c = 0 to col { for c = 0 to col {
@l2 let l2_dist : f32[num_ctrl_pts];
for cp = 0 to num_ctrl_pts { for cp = 0 to num_ctrl_pts {
let v1 = input[0, r, c] - ctrl_pts[cp, 0]; let v1 = input[0, r, c] - ctrl_pts[cp, 0];
let v2 = input[1, r, c] - ctrl_pts[cp, 1]; let v2 = input[1, r, c] - ctrl_pts[cp, 1];
...@@ -210,8 +210,8 @@ fn cava<r, c, num_ctrl_pts : usize>( ...@@ -210,8 +210,8 @@ fn cava<r, c, num_ctrl_pts : usize>(
@fuse1 let scaled = scale::<r, c>(input); @fuse1 let scaled = scale::<r, c>(input);
@fuse1 let demosc = demosaic::<r, c>(scaled); @fuse1 let demosc = demosaic::<r, c>(scaled);
@fuse2 let denosd = denoise::<r, c>(demosc); @fuse2 let denosd = denoise::<r, c>(demosc);
let transf = transform::<r, c>(denosd, TsTw); @fuse3 let transf = transform::<r, c>(denosd, TsTw);
let gamutd = gamut::<r, c, num_ctrl_pts>(transf, ctrl_pts, weights, coefs); @fuse4 let gamutd = gamut::<r, c, num_ctrl_pts>(transf, ctrl_pts, weights, coefs);
let tonemd = tone_map::<r, c>(gamutd, tonemap); let tonemd = tone_map::<r, c>(gamutd, tonemap);
let dscald = descale::<r, c>(tonemd); let dscald = descale::<r, c>(tonemd);
return dscald; return dscald;
......
...@@ -16,11 +16,18 @@ inline(fuse1); ...@@ -16,11 +16,18 @@ inline(fuse1);
let fuse2 = outline(cava@fuse2); let fuse2 = outline(cava@fuse2);
inline(fuse2); inline(fuse2);
let fuse3 = outline(cava@fuse3);
inline(fuse3);
let fuse4 = outline(cava@fuse4);
inline(fuse4);
ip-sroa(*); ip-sroa(*);
sroa(*); sroa(*);
simpl!(*); simpl!(*);
no-memset(fuse1@const); no-memset(fuse1@res1);
no-memset(fuse1@res2);
fixpoint { fixpoint {
forkify(fuse1); forkify(fuse1);
fork-guard-elim(fuse1); fork-guard-elim(fuse1);
...@@ -45,6 +52,24 @@ array-slf(fuse2); ...@@ -45,6 +52,24 @@ array-slf(fuse2);
simpl!(fuse2); simpl!(fuse2);
unforkify(fuse2); unforkify(fuse2);
delete-uncalled(*); no-memset(fuse3@res);
fixpoint {
forkify(fuse3);
fork-guard-elim(fuse3);
fork-coalesce(fuse3);
}
fork-split(fuse3);
unforkify(fuse3);
no-memset(fuse4@res);
no-memset(fuse4@l2);
fixpoint {
forkify(fuse4);
fork-guard-elim(fuse4);
fork-coalesce(fuse4);
}
fork-split(fuse4);
unforkify(fuse4);
delete-uncalled(*);
gcm(*); gcm(*);
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment