Skip to content
Snippets Groups Projects

Some Cava optimization

Merged rarbore2 requested to merge cava_opt into main
Files
2
fn medianMatrix<a : number, rows, cols : usize>(m : a[rows, cols]) -> a {
const n : usize = rows * cols;
let tmp : a[rows * cols];
for i = 0 to rows * cols {
tmp[i] = m[i / cols, i % cols];
}
for i = 0 to n - 1 {
for j = 0 to n - i - 1 {
if tmp[j] > tmp[j+1] {
let t : a = tmp[j];
tmp[j] = tmp[j+1];
tmp[j+1] = t;
@median {
const n : usize = rows * cols;
@tmp let tmp : a[rows * cols];
for i = 0 to rows * cols {
tmp[i] = m[i / cols, i % cols];
}
@medianOuter for i = 0 to n - 1 {
for j = 0 to n - i - 1 {
if tmp[j] > tmp[j+1] {
let t : a = tmp[j];
tmp[j] = tmp[j+1];
tmp[j+1] = t;
}
}
}
return tmp[n / 2];
}
return tmp[n / 2];
}
const CHAN : u64 = 3;
fn scale<row : usize, col : usize>(input : u8[CHAN, row, col]) -> f32[CHAN, row, col] {
let res : f32[CHAN, row, col];
@res1 let res : f32[CHAN, row, col];
for chan = 0 to CHAN {
for r = 0 to row {
for c = 0 to col {
res[chan, r, c] = input[chan, r, c] as f32 * 1.0 / 255;
}
}
}
return res;
}
fn descale<row : usize, col : usize>(input : f32[CHAN, row, col]) -> u8[CHAN, row, col] {
let res : u8[CHAN, row, col];
for chan = 0 to CHAN {
for r = 0 to row {
for c = 0 to col {
res[chan, r, c] = min!::<f32>(max!::<f32>(input[chan, r, c] * 255, 0), 255) as u8;
res[chan, r, c] = input[chan, r, c] as f32 / 255.0;
}
}
}
@@ -50,7 +38,7 @@ fn descale<row : usize, col : usize>(input : f32[CHAN, row, col]) -> u8[CHAN, ro
}
fn demosaic<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, row, col] {
let res : f32[CHAN, row, col];
@res2 let res : f32[CHAN, row, col];
for r = 1 to row-1 {
for c = 1 to col-1 {
@@ -102,13 +90,13 @@ fn demosaic<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN,
}
fn denoise<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, row, col] {
let res : f32[CHAN, row, col];
@res let res : f32[CHAN, row, col];
for chan = 0 to CHAN {
for r = 0 to row {
for c = 0 to col {
if r >= 1 && r < row - 1 && c >= 1 && c < col - 1 {
let filter : f32[3][3]; // same as [3, 3]
@filter let filter : f32[3][3]; // same as [3, 3]
for i = 0 to 3 by 1 {
for j = 0 to 3 by 1 {
filter[i, j] = input[chan, r + i - 1, c + j - 1];
@@ -129,7 +117,7 @@ fn denoise<row : usize, col : usize>(input : f32[CHAN, row, col]) -> f32[CHAN, r
fn transform<row : usize, col : usize>
(input : f32[CHAN, row, col], tstw_trans : f32[CHAN, CHAN])
-> f32[CHAN, row, col] {
let result : f32[CHAN, row, col];
@res let result : f32[CHAN, row, col];
for chan = 0 to CHAN {
for r = 0 to row {
@@ -152,11 +140,11 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>(
weights : f32[num_ctrl_pts, CHAN],
coefs : f32[4, CHAN]
) -> f32[CHAN, row, col] {
let result : f32[CHAN, row, col];
let l2_dist : f32[num_ctrl_pts];
@res let result : f32[CHAN, row, col];
for r = 0 to row {
for c = 0 to col {
@l2 let l2_dist : f32[num_ctrl_pts];
for cp = 0 to num_ctrl_pts {
let v1 = input[0, r, c] - ctrl_pts[cp, 0];
let v2 = input[1, r, c] - ctrl_pts[cp, 1];
@@ -164,8 +152,8 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>(
let v = v1 * v1 + v2 * v2 + v3 * v3;
l2_dist[cp] = sqrt!::<f32>(v);
}
for chan = 0 to CHAN {
@channel_loop for chan = 0 to CHAN {
let chan_val : f32 = 0.0;
for cp = 0 to num_ctrl_pts {
chan_val += l2_dist[cp] * weights[cp, chan];
@@ -184,7 +172,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>(
fn tone_map<row : usize, col:usize>
(input : f32[CHAN, row, col], tone_map : f32[256, CHAN]) -> f32[CHAN, row, col] {
let result : f32[CHAN, row, col];
@res1 let result : f32[CHAN, row, col];
for chan = 0 to CHAN {
for r = 0 to row {
@@ -198,6 +186,20 @@ fn tone_map<row : usize, col:usize>
return result;
}
fn descale<row : usize, col : usize>(input : f32[CHAN, row, col]) -> u8[CHAN, row, col] {
@res2 let res : u8[CHAN, row, col];
for chan = 0 to CHAN {
for r = 0 to row {
for c = 0 to col {
res[chan, r, c] = min!::<f32>(max!::<f32>(input[chan, r, c] * 255, 0), 255) as u8;
}
}
}
return res;
}
#[entry]
fn cava<r, c, num_ctrl_pts : usize>(
input : u8[CHAN, r, c],
@@ -207,11 +209,12 @@ fn cava<r, c, num_ctrl_pts : usize>(
coefs : f32[4, CHAN],
tonemap : f32[256, CHAN],
) -> u8[CHAN, r, c] {
let scaled = scale::<r, c>(input);
let demosc = demosaic::<r, c>(scaled);
let denosd = denoise::<r, c>(demosc);
let transf = transform::<r, c>(denosd, TsTw);
let gamutd = gamut::<r, c, num_ctrl_pts>(transf, ctrl_pts, weights, coefs);
let tonemd = tone_map::<r, c>(gamutd, tonemap);
return descale::<r, c>(tonemd);
@fuse1 let scaled = scale::<r, c>(input);
@fuse1 let demosc = demosaic::<r, c>(scaled);
@fuse2 let denosd = denoise::<r, c>(demosc);
@fuse3 let transf = transform::<r, c>(denosd, TsTw);
@fuse4 let gamutd = gamut::<r, c, num_ctrl_pts>(transf, ctrl_pts, weights, coefs);
@fuse5 let tonemd = tone_map::<r, c>(gamutd, tonemap);
@fuse5 let dscald = descale::<r, c>(tonemd);
return dscald;
}
Loading