#![feature(concat_idents)] mod edge_detection_rust; use hercules_rt::{runner, HerculesCPURef}; #[cfg(feature = "cuda")] use hercules_rt::CUDABox; use std::slice::from_raw_parts; use clap::Parser; use opencv::core::{Mat, Size, CV_32F, CV_8U}; use opencv::highgui::{imshow, wait_key}; use opencv::imgproc::{cvt_color_def, ColorConversionCodes}; use opencv::prelude::{MatTraitConst, VideoCaptureTrait, VideoCaptureTraitConst}; use opencv::videoio::{VideoCapture, VideoCaptureProperties, VideoWriter, VideoWriterTrait}; juno_build::juno!("edge_detection"); #[derive(Parser)] #[clap(author, version, about, long_about = None)] struct EdgeDetectionInputs { input: String, #[clap(short, long)] display: bool, #[clap(short, long, value_name = "PATH")] output: Option<String>, #[clap(short, long)] verify: bool, #[clap(long = "display-verify")] display_verify: bool, #[clap(long = "output-verify", value_name = "PATH")] output_verify: Option<String>, #[clap(short, long, value_name = "COUNT")] frames: Option<usize>, } fn load_frame(video: &mut VideoCapture) -> Mat { let mut frame = Mat::default(); let Ok(true) = video.read(&mut frame) else { panic!("Failed to load frame"); }; let result = if frame.channels() == 3 { let mut converted = Mat::default(); let () = cvt_color_def( &frame, &mut converted, ColorConversionCodes::COLOR_BGR2GRAY.into(), ) .expect("Failure in conversion to grayscale"); let mut result = Mat::default(); let () = converted .convert_to(&mut result, CV_32F, 1.0 / 255.0, 0.0) .expect("Failure in conversion to f32"); result } else if frame.channels() == 1 { let mut result = Mat::default(); let () = frame .convert_to(&mut result, CV_32F, 1.0 / 255.0, 0.0) .expect("Failure in conversion to f32"); result } else { panic!("Expected either RGB or grayscale image"); }; assert!(result.is_continuous()); result } fn frame_from_slice(frame: &[f32], height: usize, width: usize) -> Mat { let result = Mat::from_slice(frame) .expect("Failed to create matrix from result") .reshape(1, height as i32) .expect("Failed to reshape result matrix") .clone_pointee(); assert!(result.cols() == width as i32); // Convert to u8 since the VideoWriter seems to require that let mut converted = Mat::default(); let () = result .convert_to(&mut converted, CV_8U, 255.0, 0.0) .expect("Failure in conversion to u8"); converted } fn edge_detection_harness(args: EdgeDetectionInputs) { let EdgeDetectionInputs { input, display, output, verify, display_verify, output_verify, frames, } = args; let gs: usize = 7; let gaussian_filter: Vec<f32> = vec![ 0.000036, 0.000363, 0.001446, 0.002291, 0.001446, 0.000363, 0.000036, 0.000363, 0.003676, 0.014662, 0.023226, 0.014662, 0.003676, 0.000363, 0.001446, 0.014662, 0.058488, 0.092651, 0.058488, 0.014662, 0.001446, 0.002291, 0.023226, 0.092651, 0.146768, 0.092651, 0.023226, 0.002291, 0.001446, 0.014662, 0.058488, 0.092651, 0.058488, 0.014662, 0.001446, 0.000363, 0.003676, 0.014662, 0.023226, 0.014662, 0.003676, 0.000363, 0.000036, 0.000363, 0.001446, 0.002291, 0.001446, 0.000363, 0.000036, ]; #[cfg(not(feature = "cuda"))] let gaussian_filter_h = HerculesCPURef::from_slice(&gaussian_filter); #[cfg(feature = "cuda")] let gaussian_filter_cuda = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&gaussian_filter)); #[cfg(feature = "cuda")] let gaussian_filter_h = gaussian_filter_cuda.get_ref(); let sz: usize = 3; let structure: Vec<f32> = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]; #[cfg(not(feature = "cuda"))] let structure_h = HerculesCPURef::from_slice(&structure); #[cfg(feature = "cuda")] let structure_cuda = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&structure)); #[cfg(feature = "cuda")] let structure_h = structure_cuda.get_ref(); let sb: usize = 3; let sx: Vec<f32> = vec![-1.0, 0.0, 1.0, -2.0, 0.0, 2.0, -1.0, 0.0, 1.0]; #[cfg(not(feature = "cuda"))] let sx_h = HerculesCPURef::from_slice(&sx); #[cfg(feature = "cuda")] let sx_cuda = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&sx)); #[cfg(feature = "cuda")] let sx_h = sx_cuda.get_ref(); let sy: Vec<f32> = vec![-1.0, -2.0, -1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0]; #[cfg(not(feature = "cuda"))] let sy_h = HerculesCPURef::from_slice(&sy); #[cfg(feature = "cuda")] let sy_cuda = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&sy)); #[cfg(feature = "cuda")] let sy_h = sy_cuda.get_ref(); let theta: f32 = 0.1; let mut video = VideoCapture::from_file_def(&input).expect("Error loading video"); assert!(video.is_opened().unwrap()); let fps = video .get(VideoCaptureProperties::CAP_PROP_FPS.into()) .expect("Error getting fps"); let num_frames = video .get(VideoCaptureProperties::CAP_PROP_FRAME_COUNT.into()) .expect("Error getting number of frames") as usize; let width = video .get(VideoCaptureProperties::CAP_PROP_FRAME_WIDTH.into()) .expect("Error getting width") as usize; let height = video .get(VideoCaptureProperties::CAP_PROP_FRAME_HEIGHT.into()) .expect("Error getting height") as usize; let num_frames = if let Some(frames) = frames { usize::min(frames, num_frames) } else { num_frames }; let mut r = runner!(edge_detection); let mut output = output.map(|filename| { VideoWriter::new( &filename, VideoWriter::fourcc('m', 'p', '4', 'v').unwrap(), fps, Size { width: width as i32, height: height as i32, }, false, ) .expect("Error opening output video") }); let mut output_verify = output_verify.map(|filename| { VideoWriter::new( &filename, VideoWriter::fourcc('m', 'p', '4', 'v').unwrap(), fps, Size { width: width as i32, height: height as i32, }, false, ) .expect("Error opening output video") }); for i in 0..num_frames { let frame = load_frame(&mut video); let ptr = frame.ptr_def().unwrap() as *const f32; assert!(frame.rows() as usize == height); assert!(frame.cols() as usize == width); let input = unsafe { from_raw_parts(ptr, height * width) }; #[cfg(not(feature = "cuda"))] let input_h = HerculesCPURef::from_slice(input); #[cfg(feature = "cuda")] let input_cuda = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(input)); #[cfg(feature = "cuda")] let input_h = input_cuda.get_ref(); let result = async_std::task::block_on(async { r.run( height as u64, width as u64, gs as u64, sz as u64, sb as u64, input_h, gaussian_filter_h.clone(), structure_h.clone(), sx_h.clone(), sy_h.clone(), theta, ) .await }); #[cfg(not(feature = "cuda"))] let result : Box<[f32]> = result.as_slice::<f32>().to_vec().into_boxed_slice(); #[cfg(feature = "cuda")] let result : Box<[f32]> = { let num_out = unsafe { result.__size() / std::mem::size_of::<f32>() }; let mut res_cpu: Box<[f32]> = vec![0.0; num_out].into_boxed_slice(); result.to_cpu_ref(&mut res_cpu); res_cpu }; if display { let result = frame_from_slice(&result, height, width); let () = imshow("Juno", &result).expect("Failure in displaying image"); } if let Some(ref mut output) = output { let result = frame_from_slice(&result, height, width); let () = output.write(&result).expect("Failure in writing frame"); } if verify { let rust_result = edge_detection_rust::edge_detection( height, width, gs, sz, sb, input, &gaussian_filter, &structure, &sx, &sy, theta, ); assert_eq!(result.as_ref(), <Vec<f32> as AsRef<[f32]>>::as_ref(&rust_result)); println!("Frames {} match", i); if display_verify { let rust_result = frame_from_slice(&rust_result, height, width); let () = imshow("Rust", &rust_result).expect("Failure in displaying image"); } if let Some(ref mut output) = output_verify { let result = frame_from_slice(&rust_result, height, width); let () = output.write(&result).expect("Failure in writing frame"); } } if display || (verify && display_verify) { let _ = wait_key(0); } } if let Some(mut output) = output { let () = output.release().expect("Failure releasing output video"); } if let Some(mut output) = output_verify { let () = output.release().expect("Failure releasing output video"); } } fn main() { let args = EdgeDetectionInputs::parse(); edge_detection_harness(args); } #[test] fn edge_detection_test() { edge_detection_harness(EdgeDetectionInputs { input: "examples/formula1_scaled.mp4".to_string(), display: false, output: None, verify: true, display_verify: false, output_verify: None, // Limit frames to keep runtime low frames: Some(2), }); }