diff --git a/hercules_cg/src/rt.rs b/hercules_cg/src/rt.rs index 5edddd86df2901c29109148e2b107c0b923dbd0e..bd1520745c72f9300ba750cf48e2c86c4bee978e 100644 --- a/hercules_cg/src/rt.rs +++ b/hercules_cg/src/rt.rs @@ -560,23 +560,33 @@ impl<'a> RTContext<'a> { // same interface as AsyncRust functions. let block = &mut blocks.get_mut(&bb).unwrap().data; let is_async = func.schedules[id.idx()].contains(&Schedule::AsyncCall); + if is_async { + for arg in args { + if let Some(arc) = self.clone_arc(*arg, false) { + write!(block, "{}", arc)?; + } + } + } let device = self.devices[callee_id.idx()]; let prefix = match (device, is_async) { - (Device::AsyncRust, false) => "", - (_, false) => "", - (Device::AsyncRust, true) => "Some(::async_std::task::spawn(", - (_, true) => "Some(::async_std::task::spawn(async move {", + (Device::AsyncRust, false) | (_, false) => { + format!("{} = ", self.get_value(id, bb, true)) + } + (_, true) => format!( + "{}::async_std::task::spawn(async move {{ async_call_sender_{}.send(", + self.clone_arc(id, true).unwrap(), + id.idx() + ), }; let postfix = match (device, is_async) { (Device::AsyncRust, false) => ".await", (_, false) => "", - (Device::AsyncRust, true) => "))", - (_, true) => "}))", + (Device::AsyncRust, true) => ".await).await})", + (_, true) => ").await})", }; write!( block, - "{} = {}{}(", - self.get_value(id, bb, true), + "{}{}(", prefix, self.module.functions[callee_id.idx()].name )?; @@ -1069,11 +1079,15 @@ impl<'a> RTContext<'a> { } // If the node is a call with an AsyncCall schedule, it should be - // spawned as a task and awaited later. + // lowered to a channel. let is_async_call = func.nodes[idx].is_call() && func.schedules[idx].contains(&Schedule::AsyncCall); if is_async_call { - write!(w, "let mut async_call_{} = None;", idx)?; + write!( + w, + "let mut async_call_channel_{} = ::async_std::channel::bounded(1);let async_call_sender_{} = ::std::sync::Arc::new(async_call_channel_{}.0);let async_call_receiver_{} = ::std::sync::Arc::new(async_call_channel_{}.1);", + idx, idx, idx, idx, idx + )?; } else { write!( w, @@ -1356,16 +1370,30 @@ impl<'a> RTContext<'a> { } else if func.nodes[id.idx()].is_call() && func.schedules[id.idx()].contains(&Schedule::AsyncCall) { - format!( - "async_call_{}{}", - id.idx(), - if lhs { "" } else { ".unwrap().await" } - ) + assert!(!lhs); + format!("async_call_receiver_{}.recv().await.unwrap()", id.idx(),) } else { format!("node_{}", id.idx()) } } + fn clone_arc(&self, id: NodeID, lhs: bool) -> Option<String> { + let func = self.get_func(); + if func.nodes[id.idx()].is_call() && func.schedules[id.idx()].contains(&Schedule::AsyncCall) + { + let kind = if lhs { "sender" } else { "receiver" }; + Some(format!( + "let async_call_{}_{} = async_call_{}_{}.clone();", + kind, + id.idx(), + kind, + id.idx() + )) + } else { + None + } + } + fn get_type(&self, id: TypeID) -> &'static str { convert_type(&self.module.types[id.idx()]) } diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index 3ae1c6bf8971fb91eae339c5ced475bd9ffd2265..3ac2f326115bb4aafb80c6a2d4b3cd024096db8c 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -127,17 +127,8 @@ simpl!(fuse5); delete-uncalled(*); simpl!(*); - -fork-split(fuse1); -unforkify(fuse1); -fork-split(fuse2); -unforkify(fuse2); -fork-split(fuse3); -unforkify(fuse3); -fork-split(fuse4); -unforkify(fuse4); -fork-split(fuse5); -unforkify(fuse5); +fork-split(fuse1, fuse2, fuse3, fuse4, fuse5); +unforkify(fuse1, fuse2, fuse3, fuse4, fuse5); simpl!(*); diff --git a/juno_samples/edge_detection/Cargo.toml b/juno_samples/edge_detection/Cargo.toml index fc5fb451f277897cd3fa5004f953380ee2ed2f46..483724d8e4c2c7bcd057990ce5e149923e90cc3b 100644 --- a/juno_samples/edge_detection/Cargo.toml +++ b/juno_samples/edge_detection/Cargo.toml @@ -15,7 +15,6 @@ required-features = ["opencv"] [lib] path = "src/lib.rs" -required-features = ["opencv"] [build-dependencies] juno_build = { path = "../../juno_build" } diff --git a/juno_samples/edge_detection/build.rs b/juno_samples/edge_detection/build.rs index 7071fae7612ab871757fddebdbb576a2e4a6073c..d5d6f7b7de417f48b658c849881139347869ca05 100644 --- a/juno_samples/edge_detection/build.rs +++ b/juno_samples/edge_detection/build.rs @@ -14,6 +14,8 @@ fn main() { JunoCompiler::new() .file_in_src("edge_detection.jn") .unwrap() + .schedule_in_src("cpu.sch") + .unwrap() .build() .unwrap(); } diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..3c3d09b34f9bd8c4b412d4b19e3898769cf2670a --- /dev/null +++ b/juno_samples/edge_detection/src/cpu.sch @@ -0,0 +1,79 @@ +macro simpl!(X) { + ccp(X); + simplify-cfg(X); + lift-dc-math(X); + gvn(X); + phi-elim(X); + dce(X); + infer-schedules(X); +} + +simpl!(*); + +ip-sroa(*); +sroa(*); +simpl!(*); + +no-memset(gaussian_smoothing@res); +fixpoint { + forkify(gaussian_smoothing); + fork-guard-elim(gaussian_smoothing); + fork-coalesce(gaussian_smoothing); +} +predication(gaussian_smoothing); +simpl!(gaussian_smoothing); +predication(gaussian_smoothing); +simpl!(gaussian_smoothing); + +no-memset(laplacian_estimate@res, laplacian_estimate@shr1, laplacian_estimate@shr2); +fixpoint { + forkify(laplacian_estimate); + fork-guard-elim(laplacian_estimate); + fork-coalesce(laplacian_estimate); +} +simpl!(laplacian_estimate); + +no-memset(zero_crossings@res, zero_crossings@shr1, zero_crossings@shr2); +fixpoint { + forkify(zero_crossings); + fork-guard-elim(zero_crossings); + fork-coalesce(zero_crossings); +} +simpl!(zero_crossings); + +no-memset(gradient@res); +fixpoint { + forkify(gradient); + fork-guard-elim(gradient); + fork-coalesce(gradient); +} +predication(gradient); +simpl!(gradient); +predication(gradient); +simpl!(gradient); + +fixpoint { + forkify(max_gradient); + fork-guard-elim(max_gradient); + fork-coalesce(max_gradient); +} +simpl!(max_gradient); + +no-memset(reject_zero_crossings@res); +fixpoint { + forkify(reject_zero_crossings); + fork-guard-elim(reject_zero_crossings); + fork-coalesce(reject_zero_crossings); +} +predication(reject_zero_crossings); +simpl!(reject_zero_crossings); + +async-call(edge_detection@le, edge_detection@zc); + +fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings); +unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings); + +simpl!(*); + +delete-uncalled(*); +gcm(*); diff --git a/juno_samples/edge_detection/src/edge_detection.jn b/juno_samples/edge_detection/src/edge_detection.jn index d49258c5d328d021953aaf035f80fb328a4440af..3bc5bbfbe16b6eb15d0c6387d9f0d8397ce69cc6 100644 --- a/juno_samples/edge_detection/src/edge_detection.jn +++ b/juno_samples/edge_detection/src/edge_detection.jn @@ -2,7 +2,7 @@ fn gaussian_smoothing<n, m, gs : usize>( input: f32[n, m], filter: f32[gs, gs], ) -> f32[n, m] { - let result : f32[n, m]; + @res let result : f32[n, m]; // Define the gaussian radius as half the gaussian size const gr = gs / 2; @@ -39,12 +39,12 @@ fn laplacian_estimate<n, m, sz: usize>( ) -> f32[n, m] { const r = sz / 2; - let result : f32[n, m]; + @res let result : f32[n, m]; for row = 0 to n { for col = 0 to m { // Copy data for dilation filter - let imageArea : f32[sz, sz]; + @shr1 let imageArea : f32[sz, sz]; for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MIN_BR @@ -64,7 +64,7 @@ fn laplacian_estimate<n, m, sz: usize>( } // Data copy for erotion filter - let imageArea : f32[sz, sz]; + @shr2 let imageArea : f32[sz, sz]; for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MAX_BR @@ -97,12 +97,12 @@ fn zero_crossings<n, m, sz: usize>( ) -> f32[n, m] { const r = sz / 2; - let result : f32[n, m]; + @res let result : f32[n, m]; for row = 0 to n { for col = 0 to m { // Data copy for dilation filter - let imageArea : f32[sz, sz]; + @shr1 let imageArea : f32[sz, sz]; for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MIN_BR @@ -124,7 +124,7 @@ fn zero_crossings<n, m, sz: usize>( } // Data copy for erotion filter - let imageArea : f32[sz, sz]; + @shr2 let imageArea : f32[sz, sz]; for i = 0 to sz { for j = 0 to sz { imageArea[i, j] = if row + i < r then MAX_BR @@ -160,7 +160,7 @@ fn gradient<n, m, sb: usize>( ) -> f32[n, m] { const sbr = sb / 2; - let result : f32[n, m]; + @res let result : f32[n, m]; for row = 0 to n { for col = 0 to m { @@ -206,7 +206,7 @@ fn reject_zero_crossings<n, m: usize>( max_gradient: f32, theta: f32, ) -> f32[n, m] { - let result : f32[n, m]; + @res let result : f32[n, m]; for row = 0 to n { for col = 0 to m { @@ -229,10 +229,10 @@ fn edge_detection<n, m, gs, sz, sb: usize>( sy: f32[sb, sb], theta: f32, ) -> f32[n, m] { - let smoothed = gaussian_smoothing::<n, m, gs>(input, gaussian_filter); - let laplacian = laplacian_estimate::<n, m, sz>(smoothed, structure); - let zcs = zero_crossings::<n, m, sz>(laplacian, structure); - let gradient = gradient::<n, m, sb>(smoothed, sx, sy); - let maxgrad = max_gradient::<n, m>(gradient); + let smoothed = gaussian_smoothing::<n, m, gs>(input, gaussian_filter); + @le let laplacian = laplacian_estimate::<n, m, sz>(smoothed, structure); + @zc let zcs = zero_crossings::<n, m, sz>(laplacian, structure); + let gradient = gradient::<n, m, sb>(smoothed, sx, sy); + let maxgrad = max_gradient::<n, m>(gradient); return reject_zero_crossings::<n, m>(zcs, gradient, maxgrad, theta); } diff --git a/juno_samples/edge_detection/src/lib.rs b/juno_samples/edge_detection/src/lib.rs index 37268b561f2c0104a89f097b88abde5d3a34692a..6c2a15bd394a8fed3828ea79f2f8470856ead846 100644 --- a/juno_samples/edge_detection/src/lib.rs +++ b/juno_samples/edge_detection/src/lib.rs @@ -1,3 +1,4 @@ +#![cfg(feature = "opencv")] #![feature(concat_idents)] mod edge_detection_rust;