diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs index b2567b8f1313e97aeb70268b11847b6d70b1a2f5..1ecebf117548b0ceeecc2ca2883a28129a8a5184 100644 --- a/hercules_ir/src/typecheck.rs +++ b/hercules_ir/src/typecheck.rs @@ -822,7 +822,7 @@ fn typeflow( // We also return the return type from here match intrinsic { // Intrinsics that take any numeric type and return the same - Intrinsic::Abs => { + Intrinsic::Abs | Intrinsic::Max | Intrinsic::Min => { if let Concrete(id) = inputs[0] { if types[id.idx()].is_arithmetic() { Concrete(*id) @@ -856,8 +856,6 @@ fn typeflow( | Intrinsic::Ln1P | Intrinsic::Log10 | Intrinsic::Log2 - | Intrinsic::Max - | Intrinsic::Min | Intrinsic::Round | Intrinsic::Sin | Intrinsic::Sinh diff --git a/juno_samples/rodinia/srad/benches/srad_bench.rs b/juno_samples/rodinia/srad/benches/srad_bench.rs index 728702d9bcc18405ef291945f81413f49f5715af..6af13aae5d9093bd59cf6299dfa64ac3e73209a6 100644 --- a/juno_samples/rodinia/srad/benches/srad_bench.rs +++ b/juno_samples/rodinia/srad/benches/srad_bench.rs @@ -24,19 +24,6 @@ fn srad_bench(c: &mut Criterion) { } = read_graphics(image); let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols); let mut image_h = HerculesMutBox::from(image.clone()); - let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>(); - let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>(); - let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>(); - let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>(); - // Fix boundary conditions - iN[0] = 0; - iS[nrows - 1] = (nrows - 1) as i32; - jW[0] = 0; - jE[ncols - 1] = (ncols - 1) as i32; - let iN_h = HerculesImmBox::from(iN.as_slice()); - let iS_h = HerculesImmBox::from(iS.as_slice()); - let jW_h = HerculesImmBox::from(jW.as_slice()); - let jE_h = HerculesImmBox::from(jE.as_slice()); group.bench_function("srad bench", |b| { b.iter(|| { async_std::task::block_on(async { @@ -45,10 +32,6 @@ fn srad_bench(c: &mut Criterion) { ncols as u64, niter as u64, image_h.to(), - iN_h.to(), - iS_h.to(), - jW_h.to(), - jE_h.to(), max, lambda, ) diff --git a/juno_samples/rodinia/srad/src/cpu.sch b/juno_samples/rodinia/srad/src/cpu.sch index b3188b60a96319d429a42b5157a6b61b5861cf15..5a8c180e23a4af9f013c65b0cdccaaca2b2b48b6 100644 --- a/juno_samples/rodinia/srad/src/cpu.sch +++ b/juno_samples/rodinia/srad/src/cpu.sch @@ -32,7 +32,7 @@ simpl!(*); fork-interchange[0, 1](loop1); reduce-slf(*); simpl!(*); -slf!(*); +slf(*); simpl!(*); fork-split(*); diff --git a/juno_samples/rodinia/srad/src/lib.rs b/juno_samples/rodinia/srad/src/lib.rs index a647b94a5ffc8aad3bab91badc1bd58a305e7e75..cb156d9db712155ad9e5b3f9775e3c6c29dbf22a 100644 --- a/juno_samples/rodinia/srad/src/lib.rs +++ b/juno_samples/rodinia/srad/src/lib.rs @@ -48,22 +48,6 @@ pub fn srad_harness(args: SRADInputs) { let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols); let mut image_h = HerculesMutBox::from(image.clone()); - let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>(); - let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>(); - let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>(); - let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>(); - - // Fix boundary conditions - iN[0] = 0; - iS[nrows - 1] = (nrows - 1) as i32; - jW[0] = 0; - jE[ncols - 1] = (ncols - 1) as i32; - - let iN_h = HerculesImmBox::from(iN.as_slice()); - let iS_h = HerculesImmBox::from(iS.as_slice()); - let jW_h = HerculesImmBox::from(jW.as_slice()); - let jE_h = HerculesImmBox::from(jE.as_slice()); - let mut runner = runner!(srad); let result: Vec<f32> = HerculesMutBox::from( runner @@ -72,10 +56,6 @@ pub fn srad_harness(args: SRADInputs) { ncols as u64, niter as u64, image_h.to(), - iN_h.to(), - iS_h.to(), - jW_h.to(), - jE_h.to(), max, lambda, ) @@ -90,18 +70,7 @@ pub fn srad_harness(args: SRADInputs) { if verify { let mut rust_result = image; - rust_srad::srad( - nrows, - ncols, - niter, - &mut rust_result, - &iN, - &iS, - &jW, - &jE, - max, - lambda, - ); + rust_srad::srad(nrows, ncols, niter, &mut rust_result, max, lambda); if let Some(output) = output_verify { write_graphics(output, &rust_result, nrows, ncols, max); diff --git a/juno_samples/rodinia/srad/src/rust_srad.rs b/juno_samples/rodinia/srad/src/rust_srad.rs index 3226e35faf2410f94ab887019d54abafe0219454..f25d382aff3c4f42c4478d911532a5850b509a26 100644 --- a/juno_samples/rodinia/srad/src/rust_srad.rs +++ b/juno_samples/rodinia/srad/src/rust_srad.rs @@ -1,15 +1,4 @@ -pub fn srad( - nrows: usize, - ncols: usize, - niter: usize, - image: &mut Vec<f32>, - iN: &[i32], - iS: &[i32], - jW: &[i32], - jE: &[i32], - max: f32, - lambda: f32, -) { +pub fn srad(nrows: usize, ncols: usize, niter: usize, image: &mut Vec<f32>, max: f32, lambda: f32) { let nelems = nrows * ncols; // EXTRACT @@ -44,11 +33,15 @@ pub fn srad( for i in 0..nrows { let k = i + nrows * j; let Jc = image[k]; + let iN = std::cmp::max(i, 1) - 1; + let iS = std::cmp::min(i, nrows - 2) + 1; + let jW = std::cmp::max(j, 1) - 1; + let jE = std::cmp::min(j, ncols - 2) + 1; - dN[k] = image[iN[i] as usize + nrows * j] - Jc; - dS[k] = image[iS[i] as usize + nrows * j] - Jc; - dW[k] = image[i + nrows * jW[j] as usize] - Jc; - dE[k] = image[i + nrows * jE[j] as usize] - Jc; + dN[k] = image[iN as usize + nrows * j] - Jc; + dS[k] = image[iS as usize + nrows * j] - Jc; + dW[k] = image[i + nrows * jW as usize] - Jc; + dE[k] = image[i + nrows * jE as usize] - Jc; let G2 = (dN[k] * dN[k] + dS[k] * dS[k] + dW[k] * dW[k] + dE[k] * dE[k]) / (Jc * Jc); @@ -72,11 +65,13 @@ pub fn srad( for j in 0..ncols { for i in 0..nrows { let k = i + nrows * j; + let iS = std::cmp::min(i, nrows - 2) + 1; + let jE = std::cmp::min(j, ncols - 2) + 1; let cN = c[k]; - let cS = c[iS[i] as usize + nrows * j]; + let cS = c[iS as usize + nrows * j]; let cW = c[k]; - let cE = c[i + nrows * jE[j] as usize]; + let cE = c[i + nrows * jE as usize]; let D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; diff --git a/juno_samples/rodinia/srad/src/srad.jn b/juno_samples/rodinia/srad/src/srad.jn index 176778bec2c4b223c6041ee6a277cc17b6810d72..b055b296b678eb38791ef7353e528bff5e79b4c2 100644 --- a/juno_samples/rodinia/srad/src/srad.jn +++ b/juno_samples/rodinia/srad/src/srad.jn @@ -21,10 +21,6 @@ fn compress<nrows, ncols: usize>(inout image: f32[ncols, nrows], max: f32) { fn srad<nrows, ncols: usize>( niter: usize, inout image: f32[ncols, nrows], - iN: i32[nrows], - iS: i32[nrows], - jW: i32[ncols], - jE: i32[ncols], max: f32, lambda: f32, ) { @@ -60,10 +56,15 @@ fn srad<nrows, ncols: usize>( @loop2 for j in 0..ncols { for i in 0..nrows { let Jc = image[j, i]; - dN[j, i] = image[j, iN[i] as u64] - Jc; - dS[j, i] = image[j, iS[i] as u64] - Jc; - dW[j, i] = image[jW[j] as u64, i] - Jc; - dE[j, i] = image[jE[j] as u64, i] - Jc; + let iN = max!(i, 1) - 1; + let iS = min!(i, nrows - 2) + 1; + let jW = max!(j, 1) - 1; + let jE = min!(j, ncols - 2) + 1; + + dN[j, i] = image[j, iN as u64] - Jc; + dS[j, i] = image[j, iS as u64] - Jc; + dW[j, i] = image[jW as u64, i] - Jc; + dE[j, i] = image[jE as u64, i] - Jc; let G2 = (dN[j, i] * dN[j, i] + dS[j, i] * dS[j, i] + dW[j, i] * dW[j, i] + dE[j, i] * dE[j, i]) / (Jc * Jc); @@ -85,10 +86,13 @@ fn srad<nrows, ncols: usize>( @loop3 for j in 0..ncols { for i in 0..nrows { + let iS = min!(i, nrows - 2) + 1; + let jE = min!(j, ncols - 2) + 1; + let cN = c[j, i]; - let cS = c[j, iS[i] as u64]; + let cS = c[j, iS as u64]; let cW = c[j, i]; - let cE = c[jE[j] as u64, i]; + let cE = c[jE as u64, i]; let D = cN * dN[j, i] + cS * dS[j, i] + cW * dW[j, i] + cE * dE[j, i]; image[j, i] = image[j, i] + 0.25 * lambda * D;