diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b80dd590ab42b4019daa285a83ec89d978884006..4a3ca13d145f35e54c8ae8c30b00b2be1d69d2b9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,5 @@ test-job: stage: test script: - - cargo test - - cargo test --features=cuda + - cargo test --features=opencv + - cargo test --features=cuda,opencv diff --git a/Cargo.lock b/Cargo.lock index 49630436a0f0b90d8252824046c29f0e18b78af2..b8bf227809ac5a475407f4fb53cabfbe5d11f38a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -422,6 +422,27 @@ dependencies = [ "vob", ] +[[package]] +name = "clang" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c044c781163c001b913cd018fc95a628c50d0d2dfea8bca77dad71edb16e37" +dependencies = [ + "clang-sys", + "libc", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.5.27" @@ -576,6 +597,12 @@ dependencies = [ "with_builtin_macros", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "either" version = "1.13.0" @@ -777,6 +804,12 @@ dependencies = [ "weezl", ] +[[package]] +name = "glob" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" + [[package]] name = "gloo-timers" version = "0.3.0" @@ -1085,6 +1118,18 @@ dependencies = [ "with_builtin_macros", ] +[[package]] +name = "juno_edge_detection" +version = "0.1.0" +dependencies = [ + "async-std", + "clap", + "hercules_rt", + "juno_build", + "opencv", + "with_builtin_macros", +] + [[package]] name = "juno_frontend" version = "0.1.0" @@ -1225,6 +1270,16 @@ dependencies = [ "cc", ] +[[package]] +name = "libloading" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" +dependencies = [ + "cfg-if", + "windows-targets 0.52.6", +] + [[package]] name = "libredox" version = "0.1.3" @@ -1473,6 +1528,41 @@ version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "opencv" +version = "0.94.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f80fd7d018d20b1e49bdd65e72350f1f63cad6bc9c15f850c47c31a6ad8d0d20" +dependencies = [ + "cc", + "dunce", + "jobserver", + "libc", + "num-traits", + "once_cell", + "opencv-binding-generator", + "pkg-config", + "semver", + "shlex", + "vcpkg", + "windows", +] + +[[package]] +name = "opencv-binding-generator" +version = "0.95.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7283829fe440be381fea73521f850b287fd44f994acd6453e1e19b3d479ef7fc" +dependencies = [ + "clang", + "clang-sys", + "dunce", + "once_cell", + "percent-encoding", + "regex", + "shlex", +] + [[package]] name = "ordered-float" version = "4.6.0" @@ -1506,6 +1596,12 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + [[package]] name = "phf" version = "0.11.3" @@ -2242,6 +2338,12 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ef4c4aa54d5d05a279399bfa921ec387b7aba77caf7a682ae8d86785b8fdad2" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "vergen" version = "8.3.2" @@ -2369,13 +2471,76 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" +[[package]] +name = "windows" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f919aee0a93304be7f62e8e5027811bbba96bcb1de84d6618be56e43f8a32a1" +dependencies = [ + "windows-core", + "windows-targets 0.53.0", +] + +[[package]] +name = "windows-core" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "810ce18ed2112484b0d4e15d022e5f598113e220c53e373fb31e67e21670c1ce" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-result", + "windows-strings", + "windows-targets 0.53.0", +] + +[[package]] +name = "windows-implement" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.96", +] + +[[package]] +name = "windows-interface" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb26fd936d991781ea39e87c3a27285081e3c0da5ca0fcbc02d368cc6f52ff01" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.96", +] + +[[package]] +name = "windows-result" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d08106ce80268c4067c0571ca55a9b4e9516518eaa1a1fe9b37ca403ae1d1a34" +dependencies = [ + "windows-targets 0.53.0", +] + +[[package]] +name = "windows-strings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b888f919960b42ea4e11c2f408fadb55f78a9f236d5eef084103c8ce52893491" +dependencies = [ + "windows-targets 0.53.0", +] + [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", ] [[package]] @@ -2384,14 +2549,30 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", ] [[package]] @@ -2400,48 +2581,96 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "winnow" version = "0.7.0" diff --git a/Cargo.toml b/Cargo.toml index ced011a96c96793891228876314debeabcb561ed..f4976106dfab468bc77e9d9aa2c933b4c6913490 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,8 @@ members = [ "juno_samples/nested_ccp", "juno_samples/antideps", "juno_samples/implicit_clone", - "juno_samples/cava", + "juno_samples/cava", "juno_samples/concat", - "juno_samples/schedule_test", + "juno_samples/schedule_test", + "juno_samples/edge_detection", ] diff --git a/hercules_rt/build.rs b/hercules_rt/build.rs index 8f82e86cab73b28f44222a8a3d8eaa4c49f04218..2a1538d600e1632e9311ef9464c38a83fdf44415 100644 --- a/hercules_rt/build.rs +++ b/hercules_rt/build.rs @@ -1,3 +1,5 @@ +#![feature(exit_status_error)] + use std::env::var; use std::path::Path; use std::process::Command; @@ -9,12 +11,16 @@ fn main() { .args(&["src/rtdefs.cu", "-c", "-o"]) .arg(&format!("{}/rtdefs.o", out_dir)) .status() - .expect("PANIC: NVCC failed when building runtime. Is NVCC installed?"); + .expect("PANIC: NVCC failed when building runtime. Is NVCC installed?") + .exit_ok() + .expect("NVCC did not succeed"); Command::new("ar") .current_dir(&Path::new(&out_dir)) .args(&["crus", "librtdefs.a", "rtdefs.o"]) .status() - .unwrap(); + .unwrap() + .exit_ok() + .expect("ar did not succeed"); println!("cargo::rustc-link-search=native={}", out_dir); println!("cargo::rustc-link-search=native=/usr/lib/x86_64-linux-gnu/"); diff --git a/juno_samples/edge_detection/Cargo.toml b/juno_samples/edge_detection/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..a3825eed9b6601ca4d06d8e81d332ed5986b3124 --- /dev/null +++ b/juno_samples/edge_detection/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "juno_edge_detection" +version = "0.1.0" +authors = ["Aaron Councilman <aaronjc4@illinois.edu>"] +edition = "2021" + +[features] +opencv = ["dep:opencv"] +cuda = ["juno_build/cuda", "hercules_rt/cuda"] + +[[bin]] +name = "juno_edge_detection" +path = "src/main.rs" +required-features = ["opencv"] + +[build-dependencies] +juno_build = { path = "../../juno_build" } + +[dependencies] +juno_build = { path = "../../juno_build" } +hercules_rt = { path = "../../hercules_rt" } +async-std = "*" +clap = { version = "*", features = ["derive"] } +with_builtin_macros = "0.1.0" +opencv = { version = "*", features = ["clang-runtime"], optional = true } diff --git a/juno_samples/edge_detection/build.rs b/juno_samples/edge_detection/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..7071fae7612ab871757fddebdbb576a2e4a6073c --- /dev/null +++ b/juno_samples/edge_detection/build.rs @@ -0,0 +1,19 @@ +extern crate juno_build; +use juno_build::JunoCompiler; + +fn main() { + #[cfg(feature = "cuda")] + JunoCompiler::new() + .file_in_src("edge_detection.jn") + .unwrap() + .schedule_in_src("gpu.sch") + .unwrap() + .build() + .unwrap(); + #[cfg(not(feature = "cuda"))] + JunoCompiler::new() + .file_in_src("edge_detection.jn") + .unwrap() + .build() + .unwrap(); +} diff --git a/juno_samples/edge_detection/examples/formula1_scaled.mp4 b/juno_samples/edge_detection/examples/formula1_scaled.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..1e0e3979fe0f1d2ca299313a1f404beeee6f9f3f Binary files /dev/null and b/juno_samples/edge_detection/examples/formula1_scaled.mp4 differ diff --git a/juno_samples/edge_detection/src/edge_detection.jn b/juno_samples/edge_detection/src/edge_detection.jn new file mode 100644 index 0000000000000000000000000000000000000000..d49258c5d328d021953aaf035f80fb328a4440af --- /dev/null +++ b/juno_samples/edge_detection/src/edge_detection.jn @@ -0,0 +1,238 @@ +fn gaussian_smoothing<n, m, gs : usize>( + input: f32[n, m], + filter: f32[gs, gs], +) -> f32[n, m] { + let result : f32[n, m]; + + // Define the gaussian radius as half the gaussian size + const gr = gs / 2; + + for row = 0 to n { + for col = 0 to m { + let smoothed = 0.0; + + for i = 0 to gs { + for j = 0 to gs { + let val = input[if row + i < gr then 0 + else if row + i - gr > n - 1 then n - 1 + else row + i - gr, + if col + j < gr then 0 + else if col + j - gr > m - 1 then m - 1 + else col + j - gr]; + smoothed += val * filter[i, j]; + } + } + + result[row, col] = smoothed; + } + } + + return result; +} + +const MIN_BR : f32 = 0; +const MAX_BR : f32 = 1; + +fn laplacian_estimate<n, m, sz: usize>( + input: f32[n, m], + structure: f32[sz, sz], +) -> f32[n, m] { + const r = sz / 2; + + let result : f32[n, m]; + + for row = 0 to n { + for col = 0 to m { + // Copy data for dilation filter + let imageArea : f32[sz, sz]; + for i = 0 to sz { + for j = 0 to sz { + imageArea[i, j] = if row + i < r then MIN_BR + else if row + i - r > n - 1 then MIN_BR + else if col + j < r then MIN_BR + else if col + j - r > m - 1 then MIN_BR + else input[row + i - r, col + j - r]; + } + } + + // Compute pixel of dilated image + let dilated = MIN_BR; + for i = 0 to sz { + for j = 0 to sz { + dilated = max!(dilated, imageArea[i, j] * structure[i, j]); + } + } + + // Data copy for erotion filter + let imageArea : f32[sz, sz]; + for i = 0 to sz { + for j = 0 to sz { + imageArea[i, j] = if row + i < r then MAX_BR + else if row + i - r > n - 1 then MAX_BR + else if col + j < r then MAX_BR + else if col + j - r > m - 1 then MAX_BR + else input[row + i - r, col + j - r]; + } + } + + // Compute pixel of eroded image + let eroded = MAX_BR; + for i = 0 to sz { + for j = 0 to sz { + eroded = min!(eroded, imageArea[i, j] * structure[i, j]); + } + } + + let laplacian = dilated + eroded - 2 * input[row, col]; + result[row, col] = laplacian; + } + } + + return result; +} + +fn zero_crossings<n, m, sz: usize>( + input: f32[n, m], + structure: f32[sz, sz], +) -> f32[n, m] { + const r = sz / 2; + + let result : f32[n, m]; + + for row = 0 to n { + for col = 0 to m { + // Data copy for dilation filter + let imageArea : f32[sz, sz]; + for i = 0 to sz { + for j = 0 to sz { + imageArea[i, j] = if row + i < r then MIN_BR + else if row + i - r > n - 1 then MIN_BR + else if col + j < r then MIN_BR + else if col + j - r > m - 1 then MIN_BR + else if input[row + i - r, col + j - r] > MIN_BR + then MAX_BR + else MIN_BR; + } + } + + // Compute the pixel of dilated image + let dilated = MIN_BR; + for i = 0 to sz { + for j = 0 to sz { + dilated = max!(dilated, imageArea[i, j] * structure[i, j]); + } + } + + // Data copy for erotion filter + let imageArea : f32[sz, sz]; + for i = 0 to sz { + for j = 0 to sz { + imageArea[i, j] = if row + i < r then MAX_BR + else if row + i - r > n - 1 then MAX_BR + else if col + j < r then MAX_BR + else if col + j - r > m - 1 then MAX_BR + else if input[row + i - r, col + j - r] > MIN_BR + then MAX_BR + else MIN_BR; + } + } + + // Compute the pixel of eroded image + let eroded = MAX_BR; + for i = 0 to sz { + for j = 0 to sz { + eroded = min!(eroded, imageArea[i, j] * structure[i, j]); + } + } + + let sign = dilated - eroded; + result[row, col] = sign; + } + } + + return result; +} + +fn gradient<n, m, sb: usize>( + input: f32[n, m], + sx: f32[sb, sb], + sy: f32[sb, sb], +) -> f32[n, m] { + const sbr = sb / 2; + + let result : f32[n, m]; + + for row = 0 to n { + for col = 0 to m { + + let gx = 0; + let gy = 0; + + for i = 0 to sb { + for j = 0 to sb { + let val = input[if row + i < sbr then 0 + else if row + i - sbr > n - 1 then n - 1 + else row + i - sbr, + if col + j < sbr then 0 + else if col + j - sbr > m - 1 then m - 1 + else col + j - sbr]; + gx += val * sx[i, j]; + gy += val * sy[i, j]; + } + } + + result[row, col] = sqrt!(gx * gx + gy * gy); + } + } + + return result; +} + +fn max_gradient<n, m: usize>(gradient: f32[n, m]) -> f32 { + let max = gradient[0, 0]; + + for i = 0 to n { + for j = 0 to m { + max = max!(max, gradient[i, j]); + } + } + + return max; +} + +fn reject_zero_crossings<n, m: usize>( + crossings: f32[n, m], + gradient: f32[n, m], + max_gradient: f32, + theta: f32, +) -> f32[n, m] { + let result : f32[n, m]; + + for row = 0 to n { + for col = 0 to m { + result[row, col] = + if crossings[row, col] > 0 && gradient[row, col] > theta * max_gradient + then 1.0 + else 0.0; + } + } + + return result; +} + +#[entry] +fn edge_detection<n, m, gs, sz, sb: usize>( + input: f32[n, m], + gaussian_filter: f32[gs, gs], + structure: f32[sz, sz], + sx: f32[sb, sb], + sy: f32[sb, sb], + theta: f32, +) -> f32[n, m] { + let smoothed = gaussian_smoothing::<n, m, gs>(input, gaussian_filter); + let laplacian = laplacian_estimate::<n, m, sz>(smoothed, structure); + let zcs = zero_crossings::<n, m, sz>(laplacian, structure); + let gradient = gradient::<n, m, sb>(smoothed, sx, sy); + let maxgrad = max_gradient::<n, m>(gradient); + return reject_zero_crossings::<n, m>(zcs, gradient, maxgrad, theta); +} diff --git a/juno_samples/edge_detection/src/edge_detection_rust.rs b/juno_samples/edge_detection/src/edge_detection_rust.rs new file mode 100644 index 0000000000000000000000000000000000000000..cbe0a7b30afc0338b3e24f6d576a36e65ccaeee4 --- /dev/null +++ b/juno_samples/edge_detection/src/edge_detection_rust.rs @@ -0,0 +1,461 @@ +fn gaussian_smoothing( + rows: usize, + cols: usize, + gaussian_size: usize, + input: &[f32], + filter: &[f32], +) -> Vec<f32> { + let mut result = vec![0.0; rows * cols]; + let gaussian_radius: i64 = (gaussian_size / 2) as i64; + + let rows = rows as i64; + let cols = cols as i64; + + for gy in 0..rows { + for gx in 0..cols { + let gloc = gx + gy * cols; + + let mut smoothed_val = 0.0; + + for i in -gaussian_radius..=gaussian_radius { + for j in -gaussian_radius..=gaussian_radius { + let mut load_offset = gloc + i * cols + j; + + if gy + i < 0 { + // top contour + load_offset = gx + j; + } else if gy + i > rows - 1 { + // bottom contour + load_offset = (rows - 1) * cols + gx + j; + } + + // Adjust so we are within image horizontally + if gx + j < 0 { + // left contour + load_offset -= gx + j; + } else if gx + j > cols - 1 { + // right contour + load_offset = load_offset - gx - j + cols - 1; + } + + let gval = input[load_offset as usize]; + let factor_offset = + (gaussian_radius + i) * (gaussian_size as i64) + gaussian_radius + j; + smoothed_val += gval * filter[factor_offset as usize]; + } + } + + result[gloc as usize] = smoothed_val; + } + } + + result +} + +const MIN_BR: f32 = 0.0; +const MAX_BR: f32 = 1.0; + +fn laplacian_estimate( + rows: usize, + cols: usize, + szb: usize, + input: &[f32], + structure: &[f32], +) -> Vec<f32> { + assert!(szb == 3); + + let mut result = vec![0.0; rows * cols]; + + for gy in 0..rows { + for gx in 0..cols { + let mut image_area = vec![0.0; szb * szb]; + + // Copy for dilation filter + image_area[1 * szb + 1] = input[gy * cols + gx]; + + if gx == 0 { + image_area[0 * szb + 0] = MIN_BR; + image_area[1 * szb + 0] = MIN_BR; + image_area[2 * szb + 0] = MIN_BR; + } else { + image_area[1 * szb + 0] = input[gy * cols + gx - 1]; + image_area[0 * szb + 0] = if gy > 0 { + input[(gy - 1) * cols + gx - 1] + } else { + MIN_BR + }; + image_area[2 * szb + 0] = if gy < rows - 1 { + input[(gy + 1) * cols + gx - 1] + } else { + MIN_BR + }; + } + + if gx == cols - 1 { + image_area[0 * szb + 2] = MIN_BR; + image_area[1 * szb + 2] = MIN_BR; + image_area[2 * szb + 2] = MIN_BR; + } else { + image_area[1 * szb + 2] = input[gy * cols + gx + 1]; + image_area[0 * szb + 2] = if gy > 0 { + input[(gy - 1) * cols + gx + 1] + } else { + MIN_BR + }; + image_area[2 * szb + 2] = if gy < rows - 1 { + input[(gy + 1) * cols + gx + 1] + } else { + MIN_BR + }; + } + + image_area[0 * szb + 1] = if gy > 0 { + input[(gy - 1) * cols + gx] + } else { + MIN_BR + }; + image_area[2 * szb + 1] = if gy < rows - 1 { + input[(gy + 1) * cols + gx] + } else { + MIN_BR + }; + + // Compute pixel of dilated image + let mut dilated_pixel = MIN_BR; + for i in 0..szb { + for j in 0..szb { + dilated_pixel = f32::max( + dilated_pixel, + image_area[i * szb + j] * structure[i * szb + j], + ); + } + } + + // Data copy for erosion filter - only change the boundary conditions + if gx == 0 { + image_area[0 * szb + 0] = MAX_BR; + image_area[1 * szb + 0] = MAX_BR; + image_area[2 * szb + 0] = MAX_BR; + } else { + if gy == 0 { + image_area[0 * szb + 0] = MAX_BR; + } + if gy == rows - 1 { + image_area[2 * szb + 0] = MAX_BR; + } + } + + if gx == cols - 1 { + image_area[0 * szb + 2] = MAX_BR; + image_area[1 * szb + 2] = MAX_BR; + image_area[2 * szb + 2] = MAX_BR; + } else { + if gy == 0 { + image_area[0 * szb + 2] = MAX_BR; + } + if gy == rows - 1 { + image_area[2 * szb + 2] = MAX_BR; + } + } + + if gy == 0 { + image_area[0 * szb + 1] = MAX_BR; + } + if gy == rows - 1 { + image_area[2 * szb + 1] = MAX_BR; + } + + // Compute pixel of eroded image + let mut eroded_pixel = MAX_BR; + for i in 0..szb { + for j in 0..szb { + eroded_pixel = f32::min( + eroded_pixel, + image_area[i * szb + j] * structure[i * szb + j], + ); + } + } + + let laplacian = dilated_pixel + eroded_pixel - 2.0 * image_area[1 * szb + 1]; + result[gy * cols + gx] = laplacian; + } + } + + result +} + +fn compute_zero_crossings( + rows: usize, + cols: usize, + szb: usize, + input: &[f32], + structure: &[f32], +) -> Vec<f32> { + assert!(szb == 3); + + let mut result = vec![0.0; rows * cols]; + + for gy in 0..rows { + for gx in 0..cols { + let mut image_area = vec![vec![0.0, 0.0, 0.0]; szb]; + + // Copy for dilation filter + image_area[1][1] = if input[gy * cols + gx] > MIN_BR { + MAX_BR + } else { + MIN_BR + }; + + if gx == 0 { + image_area[0][0] = MIN_BR; + image_area[1][0] = MIN_BR; + image_area[2][0] = MIN_BR; + } else { + image_area[1][0] = if input[gy * cols + gx - 1] > MIN_BR { + MAX_BR + } else { + MIN_BR + }; + image_area[0][0] = if gy > 0 { + if input[(gy - 1) * cols + gx - 1] > MIN_BR { + MAX_BR + } else { + MIN_BR + } + } else { + MIN_BR + }; + image_area[2][0] = if gy < rows - 1 { + if input[(gy + 1) * cols + gx - 1] > MIN_BR { + MAX_BR + } else { + MIN_BR + } + } else { + MIN_BR + }; + } + + if gx == cols - 1 { + image_area[0][2] = MIN_BR; + image_area[1][2] = MIN_BR; + image_area[2][2] = MIN_BR; + } else { + image_area[1][2] = if input[gy * cols + gx + 1] > MIN_BR { + MAX_BR + } else { + MIN_BR + }; + image_area[0][2] = if gy > 0 { + if input[(gy - 1) * cols + gx + 1] > MIN_BR { + MAX_BR + } else { + MIN_BR + } + } else { + MIN_BR + }; + image_area[2][2] = if gy < rows - 1 { + if input[(gy + 1) * cols + gx + 1] > MIN_BR { + MAX_BR + } else { + MIN_BR + } + } else { + MIN_BR + }; + } + + image_area[0][1] = if gy > 0 { + if input[(gy - 1) * cols + gx] > MIN_BR { + MAX_BR + } else { + MIN_BR + } + } else { + MIN_BR + }; + image_area[2][1] = if gy < rows - 1 { + if input[(gy + 1) * cols + gx] > MIN_BR { + MAX_BR + } else { + MIN_BR + } + } else { + MIN_BR + }; + + // Compute pixel of dilated image + let mut dilated_pixel = MIN_BR; + for i in 0..szb { + for j in 0..szb { + dilated_pixel = + f32::max(dilated_pixel, image_area[i][j] * structure[i * szb + j]); + } + } + + // Data copy for erosion filter - only change the boundary conditions + if gx == 0 { + image_area[0][0] = MAX_BR; + image_area[1][0] = MAX_BR; + image_area[2][0] = MAX_BR; + } else { + if gy == 0 { + image_area[0][0] = MAX_BR; + } + if gy == rows - 1 { + image_area[2][0] = MAX_BR; + } + } + + if gx == cols - 1 { + image_area[0][2] = MAX_BR; + image_area[1][2] = MAX_BR; + image_area[2][2] = MAX_BR; + } else { + if gy == 0 { + image_area[0][2] = MAX_BR; + } + if gy == rows - 1 { + image_area[2][2] = MAX_BR; + } + } + + if gy == 0 { + image_area[0][1] = MAX_BR; + } + if gy == rows - 1 { + image_area[2][1] = MAX_BR; + } + + // Compute pixel of eroded image + let mut eroded_pixel = MAX_BR; + for i in 0..szb { + for j in 0..szb { + eroded_pixel = + f32::min(eroded_pixel, image_area[i][j] * structure[i * szb + j]); + } + } + + let pixel_sign = dilated_pixel - eroded_pixel; + result[gy * cols + gx] = pixel_sign; + } + } + + result +} + +fn compute_gradient( + rows: usize, + cols: usize, + sobel_size: usize, + input: &[f32], + sx: &[f32], + sy: &[f32], +) -> Vec<f32> { + assert!(sobel_size == 3); + + let mut result = vec![0.0; rows * cols]; + let sobel_radius = (sobel_size / 2) as i64; + let rows = rows as i64; + let cols = cols as i64; + + for gy in 0..rows { + for gx in 0..cols { + let gloc = gx + gy * cols; + + let mut gradient_x = 0.0; + let mut gradient_y = 0.0; + + for i in -sobel_radius..=sobel_radius { + for j in -sobel_radius..=sobel_radius { + let mut load_offset = gloc + i * cols + j; + + if gy + i < 0 { + // top contour + load_offset = gx + j; + } else if gy + i > rows - 1 { + // bottom contour + load_offset = (rows - 1) * cols + gx + j; + } + + // Adjust so we are within image horizontally + if gx + j < 0 { + // left contour + load_offset -= gx + j; + } else if gx + j > cols - 1 { + // right contour + load_offset = load_offset - gx - j + cols - 1; + } + + let gval = input[load_offset as usize]; + let s_offset = (sobel_radius + i) * (sobel_size as i64) + sobel_radius + j; + gradient_x += gval * sx[s_offset as usize]; + gradient_y += gval * sy[s_offset as usize]; + } + } + + result[gloc as usize] = f32::sqrt(gradient_x * gradient_x + gradient_y * gradient_y); + } + } + + result +} + +fn compute_max_gradient(rows: usize, cols: usize, gradient: &[f32]) -> f32 { + let mut max = gradient[0]; + + for i in 1..rows * cols { + if max < gradient[i] { + max = gradient[i]; + } + } + + max +} + +fn reject_zero_crossings( + rows: usize, + cols: usize, + zero_crossings: &[f32], + gradient: &[f32], + max_gradient: f32, + theta: f32, +) -> Vec<f32> { + let mut result = vec![0.0; rows * cols]; + + for gy in 0..rows { + for gx in 0..cols { + result[gy * cols + gx] = if zero_crossings[gy * cols + gx] > 0.0 + && gradient[gy * cols + gx] > theta * max_gradient + { + 1.0 + } else { + 0.0 + }; + } + } + + result +} + +pub fn edge_detection( + rows: usize, + cols: usize, + gaussian_size: usize, + szb: usize, + soebel_size: usize, + input: &[f32], + gaussian_filter: &[f32], + structure: &[f32], + sx: &[f32], + sy: &[f32], + theta: f32, +) -> Vec<f32> { + let smoothed = gaussian_smoothing(rows, cols, gaussian_size, input, gaussian_filter); + let laplacian = laplacian_estimate(rows, cols, szb, &smoothed, structure); + let zcs = compute_zero_crossings(rows, cols, szb, &laplacian, structure); + let gradient = compute_gradient(rows, cols, soebel_size, &smoothed, sx, sy); + let max_gradient = compute_max_gradient(rows, cols, &gradient); + reject_zero_crossings(rows, cols, &zcs, &gradient, max_gradient, theta) +} diff --git a/juno_samples/edge_detection/src/gpu.sch b/juno_samples/edge_detection/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..d7330c679edc6a890efe5c0134369ca411ac7d67 --- /dev/null +++ b/juno_samples/edge_detection/src/gpu.sch @@ -0,0 +1,19 @@ +gvn(*); +phi-elim(*); +dce(*); + +inline(*); +let out = auto-outline(*); +gpu(out.edge_detection); + +ip-sroa(*); +sroa(*); +dce(*); +gvn(*); +phi-elim(*); +dce(*); + +//forkify(*); +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/edge_detection/src/main.rs b/juno_samples/edge_detection/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..eda65016c60de26a5cd1fe21d8d95dcaff826cf9 --- /dev/null +++ b/juno_samples/edge_detection/src/main.rs @@ -0,0 +1,307 @@ +#![feature(concat_idents)] + +mod edge_detection_rust; + +use hercules_rt::{runner, HerculesCPURef}; +#[cfg(feature = "cuda")] +use hercules_rt::CUDABox; + +use std::slice::from_raw_parts; + +use clap::Parser; + +use opencv::core::{Mat, Size, CV_32F, CV_8U}; +use opencv::highgui::{imshow, wait_key}; +use opencv::imgproc::{cvt_color_def, ColorConversionCodes}; +use opencv::prelude::{MatTraitConst, VideoCaptureTrait, VideoCaptureTraitConst}; +use opencv::videoio::{VideoCapture, VideoCaptureProperties, VideoWriter, VideoWriterTrait}; + +juno_build::juno!("edge_detection"); + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +struct EdgeDetectionInputs { + input: String, + #[clap(short, long)] + display: bool, + #[clap(short, long, value_name = "PATH")] + output: Option<String>, + #[clap(short, long)] + verify: bool, + #[clap(long = "display-verify")] + display_verify: bool, + #[clap(long = "output-verify", value_name = "PATH")] + output_verify: Option<String>, + #[clap(short, long, value_name = "COUNT")] + frames: Option<usize>, +} + +fn load_frame(video: &mut VideoCapture) -> Mat { + let mut frame = Mat::default(); + + let Ok(true) = video.read(&mut frame) else { + panic!("Failed to load frame"); + }; + let result = if frame.channels() == 3 { + let mut converted = Mat::default(); + let () = cvt_color_def( + &frame, + &mut converted, + ColorConversionCodes::COLOR_BGR2GRAY.into(), + ) + .expect("Failure in conversion to grayscale"); + let mut result = Mat::default(); + let () = converted + .convert_to(&mut result, CV_32F, 1.0 / 255.0, 0.0) + .expect("Failure in conversion to f32"); + result + } else if frame.channels() == 1 { + let mut result = Mat::default(); + let () = frame + .convert_to(&mut result, CV_32F, 1.0 / 255.0, 0.0) + .expect("Failure in conversion to f32"); + result + } else { + panic!("Expected either RGB or grayscale image"); + }; + + assert!(result.is_continuous()); + result +} + +fn frame_from_slice(frame: &[f32], height: usize, width: usize) -> Mat { + let result = Mat::from_slice(frame) + .expect("Failed to create matrix from result") + .reshape(1, height as i32) + .expect("Failed to reshape result matrix") + .clone_pointee(); + assert!(result.cols() == width as i32); + + // Convert to u8 since the VideoWriter seems to require that + let mut converted = Mat::default(); + let () = result + .convert_to(&mut converted, CV_8U, 255.0, 0.0) + .expect("Failure in conversion to u8"); + + converted +} + +fn edge_detection_harness(args: EdgeDetectionInputs) { + let EdgeDetectionInputs { + input, + display, + output, + verify, + display_verify, + output_verify, + frames, + } = args; + + let gs: usize = 7; + let gaussian_filter: Vec<f32> = vec![ + 0.000036, 0.000363, 0.001446, 0.002291, 0.001446, 0.000363, 0.000036, 0.000363, 0.003676, + 0.014662, 0.023226, 0.014662, 0.003676, 0.000363, 0.001446, 0.014662, 0.058488, 0.092651, + 0.058488, 0.014662, 0.001446, 0.002291, 0.023226, 0.092651, 0.146768, 0.092651, 0.023226, + 0.002291, 0.001446, 0.014662, 0.058488, 0.092651, 0.058488, 0.014662, 0.001446, 0.000363, + 0.003676, 0.014662, 0.023226, 0.014662, 0.003676, 0.000363, 0.000036, 0.000363, 0.001446, + 0.002291, 0.001446, 0.000363, 0.000036, + ]; + #[cfg(not(feature = "cuda"))] + let gaussian_filter_h = HerculesCPURef::from_slice(&gaussian_filter); + #[cfg(feature = "cuda")] + let gaussian_filter_cuda = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&gaussian_filter)); + #[cfg(feature = "cuda")] + let gaussian_filter_h = gaussian_filter_cuda.get_ref(); + + let sz: usize = 3; + let structure: Vec<f32> = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]; + #[cfg(not(feature = "cuda"))] + let structure_h = HerculesCPURef::from_slice(&structure); + #[cfg(feature = "cuda")] + let structure_cuda = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&structure)); + #[cfg(feature = "cuda")] + let structure_h = structure_cuda.get_ref(); + + let sb: usize = 3; + let sx: Vec<f32> = vec![-1.0, 0.0, 1.0, -2.0, 0.0, 2.0, -1.0, 0.0, 1.0]; + #[cfg(not(feature = "cuda"))] + let sx_h = HerculesCPURef::from_slice(&sx); + #[cfg(feature = "cuda")] + let sx_cuda = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&sx)); + #[cfg(feature = "cuda")] + let sx_h = sx_cuda.get_ref(); + + let sy: Vec<f32> = vec![-1.0, -2.0, -1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0]; + #[cfg(not(feature = "cuda"))] + let sy_h = HerculesCPURef::from_slice(&sy); + #[cfg(feature = "cuda")] + let sy_cuda = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&sy)); + #[cfg(feature = "cuda")] + let sy_h = sy_cuda.get_ref(); + + let theta: f32 = 0.1; + + let mut video = VideoCapture::from_file_def(&input).expect("Error loading video"); + assert!(video.is_opened().unwrap()); + + let fps = video + .get(VideoCaptureProperties::CAP_PROP_FPS.into()) + .expect("Error getting fps"); + + let num_frames = video + .get(VideoCaptureProperties::CAP_PROP_FRAME_COUNT.into()) + .expect("Error getting number of frames") as usize; + let width = video + .get(VideoCaptureProperties::CAP_PROP_FRAME_WIDTH.into()) + .expect("Error getting width") as usize; + let height = video + .get(VideoCaptureProperties::CAP_PROP_FRAME_HEIGHT.into()) + .expect("Error getting height") as usize; + + let num_frames = if let Some(frames) = frames { + usize::min(frames, num_frames) + } else { + num_frames + }; + + let mut r = runner!(edge_detection); + + let mut output = output.map(|filename| { + VideoWriter::new( + &filename, + VideoWriter::fourcc('m', 'p', '4', 'v').unwrap(), + fps, + Size { + width: width as i32, + height: height as i32, + }, + false, + ) + .expect("Error opening output video") + }); + + let mut output_verify = output_verify.map(|filename| { + VideoWriter::new( + &filename, + VideoWriter::fourcc('m', 'p', '4', 'v').unwrap(), + fps, + Size { + width: width as i32, + height: height as i32, + }, + false, + ) + .expect("Error opening output video") + }); + + for i in 0..num_frames { + let frame = load_frame(&mut video); + let ptr = frame.ptr_def().unwrap() as *const f32; + + assert!(frame.rows() as usize == height); + assert!(frame.cols() as usize == width); + + let input = unsafe { from_raw_parts(ptr, height * width) }; + + #[cfg(not(feature = "cuda"))] + let input_h = HerculesCPURef::from_slice(input); + #[cfg(feature = "cuda")] + let input_cuda = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(input)); + #[cfg(feature = "cuda")] + let input_h = input_cuda.get_ref(); + + let result = async_std::task::block_on(async { + r.run( + height as u64, + width as u64, + gs as u64, + sz as u64, + sb as u64, + input_h, + gaussian_filter_h.clone(), + structure_h.clone(), + sx_h.clone(), + sy_h.clone(), + theta, + ) + .await + }); + + #[cfg(not(feature = "cuda"))] + let result : Box<[f32]> = result.as_slice::<f32>().to_vec().into_boxed_slice(); + #[cfg(feature = "cuda")] + let result : Box<[f32]> = { + let num_out = unsafe { result.__size() / std::mem::size_of::<f32>() }; + let mut res_cpu: Box<[f32]> = vec![0.0; num_out].into_boxed_slice(); + result.to_cpu_ref(&mut res_cpu); + res_cpu + }; + + if display { + let result = frame_from_slice(&result, height, width); + let () = imshow("Juno", &result).expect("Failure in displaying image"); + } + if let Some(ref mut output) = output { + let result = frame_from_slice(&result, height, width); + let () = output.write(&result).expect("Failure in writing frame"); + } + + if verify { + let rust_result = edge_detection_rust::edge_detection( + height, + width, + gs, + sz, + sb, + input, + &gaussian_filter, + &structure, + &sx, + &sy, + theta, + ); + + assert_eq!(result.as_ref(), <Vec<f32> as AsRef<[f32]>>::as_ref(&rust_result)); + println!("Frames {} match", i); + + if display_verify { + let rust_result = frame_from_slice(&rust_result, height, width); + let () = imshow("Rust", &rust_result).expect("Failure in displaying image"); + } + if let Some(ref mut output) = output_verify { + let result = frame_from_slice(&rust_result, height, width); + let () = output.write(&result).expect("Failure in writing frame"); + } + } + + if display || (verify && display_verify) { + let _ = wait_key(0); + } + } + + if let Some(mut output) = output { + let () = output.release().expect("Failure releasing output video"); + } + if let Some(mut output) = output_verify { + let () = output.release().expect("Failure releasing output video"); + } +} + +fn main() { + let args = EdgeDetectionInputs::parse(); + edge_detection_harness(args); +} + +#[test] +fn edge_detection_test() { + edge_detection_harness(EdgeDetectionInputs { + input: "examples/formula1_scaled.mp4".to_string(), + display: false, + output: None, + verify: true, + display_verify: false, + output_verify: None, + // Limit frames to keep runtime low + frames: Some(2), + }); +}