Skip to content
Snippets Groups Projects
Commit e37ad2b4 authored by rarbore2's avatar rarbore2
Browse files

some tuning

parent 9c7a2c20
No related branches found
No related tags found
1 merge request!210A whole bunch of optimization
Pipeline #201956 passed
......@@ -145,7 +145,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>(
@image_loop for r = 0 to row {
for c = 0 to col {
@l2 let l2_dist : f32[num_ctrl_pts];
for cp = 0 to num_ctrl_pts {
@cp_loop for cp = 0 to num_ctrl_pts {
let v1 = input[0, r, c] - ctrl_pts[cp, 0];
let v2 = input[1, r, c] - ctrl_pts[cp, 1];
let v3 = input[2, r, c] - ctrl_pts[cp, 2];
......@@ -155,7 +155,7 @@ fn gamut<row : usize, col : usize, num_ctrl_pts : usize>(
@channel_loop for chan = 0 to CHAN {
let chan_val : f32 = 0.0;
for cp = 0 to num_ctrl_pts {
@cp_loop for cp = 0 to num_ctrl_pts {
chan_val += l2_dist[cp] * weights[cp, chan];
}
......
......@@ -115,7 +115,7 @@ array-slf(fuse4);
simpl!(fuse4);
let par = fuse4@image_loop \ fuse4@channel_loop;
fork-tile[4, 1, false, false](par);
fork-tile[4, 0, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let fuse4_body = outline(split.cava_3.fj2);
......
......@@ -117,9 +117,9 @@ fixpoint {
simpl!(fuse4);
array-slf(fuse4);
simpl!(fuse4);
//fork-tile[2, 0, false, true](fuse4@channel_loop);
//fork-split(fuse4@channel_loop);
//clean-monoid-reduces(fuse4);
fork-tile[2, 0, false, true](fuse4@channel_loop);
let out = fork-split(fuse4@channel_loop);
fork-unroll(out.cava_3.fj1);
unforkify(fuse4@channel_loop);
no-memset(fuse5@res1);
......
......@@ -26,7 +26,7 @@ predication(gaussian_smoothing);
simpl!(gaussian_smoothing);
let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop;
fork-tile[4, 1, false, false](par);
fork-tile[4, 0, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2);
......@@ -42,7 +42,7 @@ fixpoint {
simpl!(laplacian_estimate);
let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop;
fork-tile[4, 1, false, false](par);
fork-tile[4, 0, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let body = split._1_laplacian_estimate.fj2 | laplacian_estimate.shr1 | laplacian_estimate.shr2;
......@@ -59,7 +59,7 @@ fixpoint {
simpl!(zero_crossings);
let par = zero_crossings@image_loop \ zero_crossings@filter_loop;
fork-tile[4, 1, false, false](par);
fork-tile[4, 0, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let body = split._2_zero_crossings.fj2 | zero_crossings.shr1 | zero_crossings.shr2;
......@@ -86,7 +86,7 @@ fixpoint {
simpl!(max_gradient);
fork-dim-merge(max_gradient);
simpl!(max_gradient);
fork-tile[16, 0, false, false](max_gradient);
fork-tile[32, 0, false, false](max_gradient);
let split = fork-split(max_gradient);
clean-monoid-reduces(max_gradient);
let out = outline(split._4_max_gradient.fj1);
......@@ -105,7 +105,7 @@ fixpoint {
predication(reject_zero_crossings);
simpl!(reject_zero_crossings);
fork-tile[4, 1, false, false](reject_zero_crossings);
fork-tile[4, 0, false, false](reject_zero_crossings);
fork-tile[8, 0, false, false](reject_zero_crossings);
fork-interchange[1, 2](reject_zero_crossings);
let split = fork-split(reject_zero_crossings);
let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment