diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs
index 17f0f8939f4961e36da41b7c975bf867376b5d60..73dcf52811b355e8e4c1434e1f9bd6f3fb5baaff 100644
--- a/hercules_cg/src/gpu.rs
+++ b/hercules_cg/src/gpu.rs
@@ -1383,15 +1383,15 @@ extern \"C\" {} {}(",
                     let cg_tile = self.get_cg_tile(nesting_fork.unwrap(), CGType::Use);
                     #[allow(unreachable_patterns)]
                     let cg_op = match intrinsic {
-                        Intrinsic::Max => "max",
-                        Intrinsic::Min => "min",
+                        Intrinsic::Max => "greater",
+                        Intrinsic::Min => "less",
                         _ => unreachable!(),
                     };
                     let id_type_name = self.get_type(id_type, false);
                     write!(
                         w,
                         "{}{} = cg::reduce({}, {}, cg::{}<{}>());\n",
-                        tabs, define_variable, non_reduce_arg, cg_tile, cg_op, id_type_name
+                        tabs, define_variable, cg_tile, non_reduce_arg, cg_op, id_type_name
                     )?;
                 } else {
                     let ty = &self.types[id_type.idx()];
diff --git a/juno_samples/edge_detection/src/gpu.sch b/juno_samples/edge_detection/src/gpu.sch
index 3da40fd3b6aae345a8b294b9f1e4e8405c2c3f68..ad3ec65c902aefd90384f1e40098bc43ad5edeb7 100644
--- a/juno_samples/edge_detection/src/gpu.sch
+++ b/juno_samples/edge_detection/src/gpu.sch
@@ -8,6 +8,8 @@ macro simpl!(X) {
   infer-schedules(X);
 }
 
+gpu(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
+
 simpl!(*);
 
 ip-sroa(*);
@@ -58,6 +60,12 @@ fixpoint {
   fork-coalesce(max_gradient);
 }
 simpl!(max_gradient);
+fork-dim-merge(max_gradient);
+simpl!(max_gradient);
+fork-tile[32, 0, false, true](max_gradient);
+simpl!(max_gradient);
+fork-split(max_gradient);
+simpl!(max_gradient);
 
 no-memset(reject_zero_crossings@res);
 fixpoint {
@@ -70,8 +78,6 @@ simpl!(reject_zero_crossings);
 
 async-call(edge_detection@le, edge_detection@zc);
 
-gpu(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, max_gradient, reject_zero_crossings);
-
 simpl!(*);
 
 delete-uncalled(*);
diff --git a/juno_samples/edge_detection/src/lib.rs b/juno_samples/edge_detection/src/lib.rs
index 6c2a15bd394a8fed3828ea79f2f8470856ead846..dab84cf6206c3cb9b816c88c53c8ddfbec102994 100644
--- a/juno_samples/edge_detection/src/lib.rs
+++ b/juno_samples/edge_detection/src/lib.rs
@@ -143,6 +143,11 @@ pub fn edge_detection_harness(args: EdgeDetectionInputs) {
         num_frames
     };
 
+    println!(
+        "Running edge with {} rows, {} columns, {} gs, {} sz, and {} sb.",
+        height, width, gs, sz, sb,
+    );
+
     let mut r = runner!(edge_detection);
 
     let mut output = output.map(|filename| {