diff --git a/juno_samples/rodinia/cfd/src/gpu_euler.sch b/juno_samples/rodinia/cfd/src/gpu_euler.sch
index 7f7ee42ca9af7e6026b304c7b27d00bbb9a3f035..aed6115e7cf790a9274862b1ab2e4d099a194a32 100644
--- a/juno_samples/rodinia/cfd/src/gpu_euler.sch
+++ b/juno_samples/rodinia/cfd/src/gpu_euler.sch
@@ -1,23 +1,34 @@
-gvn(*);
-dce(*);
-phi-elim(*);
-dce(*);
-crc(*);
-dce(*);
-slf(*);
-dce(*);
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  crc(X);
+  slf(X);
+  dce(X);
+  infer-schedules(X);
+}
 
-let auto = auto-outline(euler);
-gpu(auto.euler);
-
-inline(auto.euler);
-inline(auto.euler);
+simpl!(*);
+inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step);
 delete-uncalled(*);
+gpu(copy_vars, compute_step_factor, compute_flux, time_step);
 
-sroa[false](auto.euler);
-dce(*);
-float-collections(*);
-dce(*);
+simpl!(*);
+ip-sroa[true](*);
+sroa[true](*);
+predication(*);
+const-inline(*);
+simpl!(*);
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+}
+simpl!(*);
+no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res);
+parallel-reduce(time_step, copy_vars, compute_flux@outer_loop \ compute_flux@inner_loop);
 
+unforkify(*);
+float-collections(*);
 gcm(*);
-
diff --git a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
index 33c46dabe92854850f55fe0f0101cfe022947cf0..d91f1b001ca1fe4b7ff544c3f8cab561490acd00 100644
--- a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
+++ b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
@@ -1,23 +1,32 @@
-gvn(*);
-dce(*);
-phi-elim(*);
-dce(*);
-crc(*);
-dce(*);
-slf(*);
-dce(*);
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  crc(X);
+  slf(X);
+  dce(X);
+  infer-schedules(X);
+}
 
-let auto = auto-outline(pre_euler);
-gpu(auto.pre_euler);
-
-inline(auto.pre_euler);
-inline(auto.pre_euler);
+simpl!(*);
+inline(compute_step_factor, compute_flux, compute_flux_contributions, compute_flux_contribution, time_step);
 delete-uncalled(*);
+gpu(copy_vars, compute_step_factor, compute_flux_contributions, compute_flux, time_step);
 
-sroa[false](auto.pre_euler);
-dce(*);
-float-collections(*);
-dce(*);
+simpl!(*);
+ip-sroa[true](*);
+sroa[true](*);
+predication(*);
+const-inline(*);
+simpl!(*);
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+}
+simpl!(*);
 
+unforkify(*);
+float-collections(*);
 gcm(*);
-