Skip to content
Snippets Groups Projects
Commit 5f96fddc authored by Russel Arbore's avatar Russel Arbore
Browse files

tweak

parent b1f50f47
No related branches found
No related tags found
1 merge request!212More optimization
Pipeline #201997 passed
...@@ -822,7 +822,7 @@ fn typeflow( ...@@ -822,7 +822,7 @@ fn typeflow(
// We also return the return type from here // We also return the return type from here
match intrinsic { match intrinsic {
// Intrinsics that take any numeric type and return the same // Intrinsics that take any numeric type and return the same
Intrinsic::Abs => { Intrinsic::Abs | Intrinsic::Max | Intrinsic::Min => {
if let Concrete(id) = inputs[0] { if let Concrete(id) = inputs[0] {
if types[id.idx()].is_arithmetic() { if types[id.idx()].is_arithmetic() {
Concrete(*id) Concrete(*id)
...@@ -856,8 +856,6 @@ fn typeflow( ...@@ -856,8 +856,6 @@ fn typeflow(
| Intrinsic::Ln1P | Intrinsic::Ln1P
| Intrinsic::Log10 | Intrinsic::Log10
| Intrinsic::Log2 | Intrinsic::Log2
| Intrinsic::Max
| Intrinsic::Min
| Intrinsic::Round | Intrinsic::Round
| Intrinsic::Sin | Intrinsic::Sin
| Intrinsic::Sinh | Intrinsic::Sinh
......
...@@ -24,19 +24,6 @@ fn srad_bench(c: &mut Criterion) { ...@@ -24,19 +24,6 @@ fn srad_bench(c: &mut Criterion) {
} = read_graphics(image); } = read_graphics(image);
let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols); let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols);
let mut image_h = HerculesMutBox::from(image.clone()); let mut image_h = HerculesMutBox::from(image.clone());
let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>();
let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>();
let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>();
let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>();
// Fix boundary conditions
iN[0] = 0;
iS[nrows - 1] = (nrows - 1) as i32;
jW[0] = 0;
jE[ncols - 1] = (ncols - 1) as i32;
let iN_h = HerculesImmBox::from(iN.as_slice());
let iS_h = HerculesImmBox::from(iS.as_slice());
let jW_h = HerculesImmBox::from(jW.as_slice());
let jE_h = HerculesImmBox::from(jE.as_slice());
group.bench_function("srad bench", |b| { group.bench_function("srad bench", |b| {
b.iter(|| { b.iter(|| {
async_std::task::block_on(async { async_std::task::block_on(async {
...@@ -45,10 +32,6 @@ fn srad_bench(c: &mut Criterion) { ...@@ -45,10 +32,6 @@ fn srad_bench(c: &mut Criterion) {
ncols as u64, ncols as u64,
niter as u64, niter as u64,
image_h.to(), image_h.to(),
iN_h.to(),
iS_h.to(),
jW_h.to(),
jE_h.to(),
max, max,
lambda, lambda,
) )
......
...@@ -32,7 +32,7 @@ simpl!(*); ...@@ -32,7 +32,7 @@ simpl!(*);
fork-interchange[0, 1](loop1); fork-interchange[0, 1](loop1);
reduce-slf(*); reduce-slf(*);
simpl!(*); simpl!(*);
slf!(*); slf(*);
simpl!(*); simpl!(*);
fork-split(*); fork-split(*);
......
...@@ -48,22 +48,6 @@ pub fn srad_harness(args: SRADInputs) { ...@@ -48,22 +48,6 @@ pub fn srad_harness(args: SRADInputs) {
let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols); let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols);
let mut image_h = HerculesMutBox::from(image.clone()); let mut image_h = HerculesMutBox::from(image.clone());
let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>();
let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>();
let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>();
let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>();
// Fix boundary conditions
iN[0] = 0;
iS[nrows - 1] = (nrows - 1) as i32;
jW[0] = 0;
jE[ncols - 1] = (ncols - 1) as i32;
let iN_h = HerculesImmBox::from(iN.as_slice());
let iS_h = HerculesImmBox::from(iS.as_slice());
let jW_h = HerculesImmBox::from(jW.as_slice());
let jE_h = HerculesImmBox::from(jE.as_slice());
let mut runner = runner!(srad); let mut runner = runner!(srad);
let result: Vec<f32> = HerculesMutBox::from( let result: Vec<f32> = HerculesMutBox::from(
runner runner
...@@ -72,10 +56,6 @@ pub fn srad_harness(args: SRADInputs) { ...@@ -72,10 +56,6 @@ pub fn srad_harness(args: SRADInputs) {
ncols as u64, ncols as u64,
niter as u64, niter as u64,
image_h.to(), image_h.to(),
iN_h.to(),
iS_h.to(),
jW_h.to(),
jE_h.to(),
max, max,
lambda, lambda,
) )
...@@ -90,18 +70,7 @@ pub fn srad_harness(args: SRADInputs) { ...@@ -90,18 +70,7 @@ pub fn srad_harness(args: SRADInputs) {
if verify { if verify {
let mut rust_result = image; let mut rust_result = image;
rust_srad::srad( rust_srad::srad(nrows, ncols, niter, &mut rust_result, max, lambda);
nrows,
ncols,
niter,
&mut rust_result,
&iN,
&iS,
&jW,
&jE,
max,
lambda,
);
if let Some(output) = output_verify { if let Some(output) = output_verify {
write_graphics(output, &rust_result, nrows, ncols, max); write_graphics(output, &rust_result, nrows, ncols, max);
......
pub fn srad( pub fn srad(nrows: usize, ncols: usize, niter: usize, image: &mut Vec<f32>, max: f32, lambda: f32) {
nrows: usize,
ncols: usize,
niter: usize,
image: &mut Vec<f32>,
iN: &[i32],
iS: &[i32],
jW: &[i32],
jE: &[i32],
max: f32,
lambda: f32,
) {
let nelems = nrows * ncols; let nelems = nrows * ncols;
// EXTRACT // EXTRACT
...@@ -44,11 +33,15 @@ pub fn srad( ...@@ -44,11 +33,15 @@ pub fn srad(
for i in 0..nrows { for i in 0..nrows {
let k = i + nrows * j; let k = i + nrows * j;
let Jc = image[k]; let Jc = image[k];
let iN = std::cmp::max(i, 1) - 1;
let iS = std::cmp::min(i, nrows - 2) + 1;
let jW = std::cmp::max(j, 1) - 1;
let jE = std::cmp::min(j, ncols - 2) + 1;
dN[k] = image[iN[i] as usize + nrows * j] - Jc; dN[k] = image[iN as usize + nrows * j] - Jc;
dS[k] = image[iS[i] as usize + nrows * j] - Jc; dS[k] = image[iS as usize + nrows * j] - Jc;
dW[k] = image[i + nrows * jW[j] as usize] - Jc; dW[k] = image[i + nrows * jW as usize] - Jc;
dE[k] = image[i + nrows * jE[j] as usize] - Jc; dE[k] = image[i + nrows * jE as usize] - Jc;
let G2 = let G2 =
(dN[k] * dN[k] + dS[k] * dS[k] + dW[k] * dW[k] + dE[k] * dE[k]) / (Jc * Jc); (dN[k] * dN[k] + dS[k] * dS[k] + dW[k] * dW[k] + dE[k] * dE[k]) / (Jc * Jc);
...@@ -72,11 +65,13 @@ pub fn srad( ...@@ -72,11 +65,13 @@ pub fn srad(
for j in 0..ncols { for j in 0..ncols {
for i in 0..nrows { for i in 0..nrows {
let k = i + nrows * j; let k = i + nrows * j;
let iS = std::cmp::min(i, nrows - 2) + 1;
let jE = std::cmp::min(j, ncols - 2) + 1;
let cN = c[k]; let cN = c[k];
let cS = c[iS[i] as usize + nrows * j]; let cS = c[iS as usize + nrows * j];
let cW = c[k]; let cW = c[k];
let cE = c[i + nrows * jE[j] as usize]; let cE = c[i + nrows * jE as usize];
let D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; let D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k];
......
...@@ -21,10 +21,6 @@ fn compress<nrows, ncols: usize>(inout image: f32[ncols, nrows], max: f32) { ...@@ -21,10 +21,6 @@ fn compress<nrows, ncols: usize>(inout image: f32[ncols, nrows], max: f32) {
fn srad<nrows, ncols: usize>( fn srad<nrows, ncols: usize>(
niter: usize, niter: usize,
inout image: f32[ncols, nrows], inout image: f32[ncols, nrows],
iN: i32[nrows],
iS: i32[nrows],
jW: i32[ncols],
jE: i32[ncols],
max: f32, max: f32,
lambda: f32, lambda: f32,
) { ) {
...@@ -60,10 +56,15 @@ fn srad<nrows, ncols: usize>( ...@@ -60,10 +56,15 @@ fn srad<nrows, ncols: usize>(
@loop2 for j in 0..ncols { @loop2 for j in 0..ncols {
for i in 0..nrows { for i in 0..nrows {
let Jc = image[j, i]; let Jc = image[j, i];
dN[j, i] = image[j, iN[i] as u64] - Jc; let iN = max!(i, 1) - 1;
dS[j, i] = image[j, iS[i] as u64] - Jc; let iS = min!(i, nrows - 2) + 1;
dW[j, i] = image[jW[j] as u64, i] - Jc; let jW = max!(j, 1) - 1;
dE[j, i] = image[jE[j] as u64, i] - Jc; let jE = min!(j, ncols - 2) + 1;
dN[j, i] = image[j, iN as u64] - Jc;
dS[j, i] = image[j, iS as u64] - Jc;
dW[j, i] = image[jW as u64, i] - Jc;
dE[j, i] = image[jE as u64, i] - Jc;
let G2 = (dN[j, i] * dN[j, i] + dS[j, i] * dS[j, i] let G2 = (dN[j, i] * dN[j, i] + dS[j, i] * dS[j, i]
+ dW[j, i] * dW[j, i] + dE[j, i] * dE[j, i]) / (Jc * Jc); + dW[j, i] * dW[j, i] + dE[j, i] * dE[j, i]) / (Jc * Jc);
...@@ -85,10 +86,13 @@ fn srad<nrows, ncols: usize>( ...@@ -85,10 +86,13 @@ fn srad<nrows, ncols: usize>(
@loop3 for j in 0..ncols { @loop3 for j in 0..ncols {
for i in 0..nrows { for i in 0..nrows {
let iS = min!(i, nrows - 2) + 1;
let jE = min!(j, ncols - 2) + 1;
let cN = c[j, i]; let cN = c[j, i];
let cS = c[j, iS[i] as u64]; let cS = c[j, iS as u64];
let cW = c[j, i]; let cW = c[j, i];
let cE = c[jE[j] as u64, i]; let cE = c[jE as u64, i];
let D = cN * dN[j, i] + cS * dS[j, i] + cW * dW[j, i] + cE * dE[j, i]; let D = cN * dN[j, i] + cS * dS[j, i] + cW * dW[j, i] + cE * dE[j, i];
image[j, i] = image[j, i] + 0.25 * lambda * D; image[j, i] = image[j, i] + 0.25 * lambda * D;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment