diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs index 17f0f8939f4961e36da41b7c975bf867376b5d60..73dcf52811b355e8e4c1434e1f9bd6f3fb5baaff 100644 --- a/hercules_cg/src/gpu.rs +++ b/hercules_cg/src/gpu.rs @@ -1383,15 +1383,15 @@ extern \"C\" {} {}(", let cg_tile = self.get_cg_tile(nesting_fork.unwrap(), CGType::Use); #[allow(unreachable_patterns)] let cg_op = match intrinsic { - Intrinsic::Max => "max", - Intrinsic::Min => "min", + Intrinsic::Max => "greater", + Intrinsic::Min => "less", _ => unreachable!(), }; let id_type_name = self.get_type(id_type, false); write!( w, "{}{} = cg::reduce({}, {}, cg::{}<{}>());\n", - tabs, define_variable, non_reduce_arg, cg_tile, cg_op, id_type_name + tabs, define_variable, cg_tile, non_reduce_arg, cg_op, id_type_name )?; } else { let ty = &self.types[id_type.idx()]; diff --git a/juno_samples/edge_detection/src/gpu.sch b/juno_samples/edge_detection/src/gpu.sch index 3da40fd3b6aae345a8b294b9f1e4e8405c2c3f68..ad3ec65c902aefd90384f1e40098bc43ad5edeb7 100644 --- a/juno_samples/edge_detection/src/gpu.sch +++ b/juno_samples/edge_detection/src/gpu.sch @@ -8,6 +8,8 @@ macro simpl!(X) { infer-schedules(X); } +gpu(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings); + simpl!(*); ip-sroa(*); @@ -58,6 +60,12 @@ fixpoint { fork-coalesce(max_gradient); } simpl!(max_gradient); +fork-dim-merge(max_gradient); +simpl!(max_gradient); +fork-tile[32, 0, false, true](max_gradient); +simpl!(max_gradient); +fork-split(max_gradient); +simpl!(max_gradient); no-memset(reject_zero_crossings@res); fixpoint { @@ -70,8 +78,6 @@ simpl!(reject_zero_crossings); async-call(edge_detection@le, edge_detection@zc); -gpu(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings); - simpl!(*); delete-uncalled(*); diff --git a/juno_samples/edge_detection/src/lib.rs b/juno_samples/edge_detection/src/lib.rs index 6c2a15bd394a8fed3828ea79f2f8470856ead846..dab84cf6206c3cb9b816c88c53c8ddfbec102994 100644 --- a/juno_samples/edge_detection/src/lib.rs +++ b/juno_samples/edge_detection/src/lib.rs @@ -143,6 +143,11 @@ pub fn edge_detection_harness(args: EdgeDetectionInputs) { num_frames }; + println!( + "Running edge with {} rows, {} columns, {} gs, {} sz, and {} sb.", + height, width, gs, sz, sb, + ); + let mut r = runner!(edge_detection); let mut output = output.map(|filename| {