From 5fb96a38840c87d68d131832ac18cd68cc297078 Mon Sep 17 00:00:00 2001
From: Aaron Councilman <aaronjc4@illinois.edu>
Date: Sat, 1 Mar 2025 14:06:37 -0600
Subject: [PATCH] Fix gpu schedule

---
 juno_samples/rodinia/cfd/src/gpu_euler.sch    | 47 ++++++++++++-------
 .../rodinia/cfd/src/gpu_pre_euler.sch         | 45 +++++++++++-------
 2 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/juno_samples/rodinia/cfd/src/gpu_euler.sch b/juno_samples/rodinia/cfd/src/gpu_euler.sch
index 7f7ee42c..aed6115e 100644
--- a/juno_samples/rodinia/cfd/src/gpu_euler.sch
+++ b/juno_samples/rodinia/cfd/src/gpu_euler.sch
@@ -1,23 +1,34 @@
-gvn(*);
-dce(*);
-phi-elim(*);
-dce(*);
-crc(*);
-dce(*);
-slf(*);
-dce(*);
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  crc(X);
+  slf(X);
+  dce(X);
+  infer-schedules(X);
+}
 
-let auto = auto-outline(euler);
-gpu(auto.euler);
-
-inline(auto.euler);
-inline(auto.euler);
+simpl!(*);
+inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step);
 delete-uncalled(*);
+gpu(copy_vars, compute_step_factor, compute_flux, time_step);
 
-sroa[false](auto.euler);
-dce(*);
-float-collections(*);
-dce(*);
+simpl!(*);
+ip-sroa[true](*);
+sroa[true](*);
+predication(*);
+const-inline(*);
+simpl!(*);
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+}
+simpl!(*);
+no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res);
+parallel-reduce(time_step, copy_vars, compute_flux@outer_loop \ compute_flux@inner_loop);
 
+unforkify(*);
+float-collections(*);
 gcm(*);
-
diff --git a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
index 33c46dab..d91f1b00 100644
--- a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
+++ b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
@@ -1,23 +1,32 @@
-gvn(*);
-dce(*);
-phi-elim(*);
-dce(*);
-crc(*);
-dce(*);
-slf(*);
-dce(*);
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  crc(X);
+  slf(X);
+  dce(X);
+  infer-schedules(X);
+}
 
-let auto = auto-outline(pre_euler);
-gpu(auto.pre_euler);
-
-inline(auto.pre_euler);
-inline(auto.pre_euler);
+simpl!(*);
+inline(compute_step_factor, compute_flux, compute_flux_contributions, compute_flux_contribution, time_step);
 delete-uncalled(*);
+gpu(copy_vars, compute_step_factor, compute_flux_contributions, compute_flux, time_step);
 
-sroa[false](auto.pre_euler);
-dce(*);
-float-collections(*);
-dce(*);
+simpl!(*);
+ip-sroa[true](*);
+sroa[true](*);
+predication(*);
+const-inline(*);
+simpl!(*);
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+}
+simpl!(*);
 
+unforkify(*);
+float-collections(*);
 gcm(*);
-
-- 
GitLab