From 5fb96a38840c87d68d131832ac18cd68cc297078 Mon Sep 17 00:00:00 2001 From: Aaron Councilman <aaronjc4@illinois.edu> Date: Sat, 1 Mar 2025 14:06:37 -0600 Subject: [PATCH] Fix gpu schedule --- juno_samples/rodinia/cfd/src/gpu_euler.sch | 47 ++++++++++++------- .../rodinia/cfd/src/gpu_pre_euler.sch | 45 +++++++++++------- 2 files changed, 56 insertions(+), 36 deletions(-) diff --git a/juno_samples/rodinia/cfd/src/gpu_euler.sch b/juno_samples/rodinia/cfd/src/gpu_euler.sch index 7f7ee42c..aed6115e 100644 --- a/juno_samples/rodinia/cfd/src/gpu_euler.sch +++ b/juno_samples/rodinia/cfd/src/gpu_euler.sch @@ -1,23 +1,34 @@ -gvn(*); -dce(*); -phi-elim(*); -dce(*); -crc(*); -dce(*); -slf(*); -dce(*); +macro simpl!(X) { + ccp(X); + simplify-cfg(X); + lift-dc-math(X); + gvn(X); + phi-elim(X); + crc(X); + slf(X); + dce(X); + infer-schedules(X); +} -let auto = auto-outline(euler); -gpu(auto.euler); - -inline(auto.euler); -inline(auto.euler); +simpl!(*); +inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step); delete-uncalled(*); +gpu(copy_vars, compute_step_factor, compute_flux, time_step); -sroa[false](auto.euler); -dce(*); -float-collections(*); -dce(*); +simpl!(*); +ip-sroa[true](*); +sroa[true](*); +predication(*); +const-inline(*); +simpl!(*); +fixpoint { + forkify(*); + fork-guard-elim(*); +} +simpl!(*); +no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res); +parallel-reduce(time_step, copy_vars, compute_flux@outer_loop \ compute_flux@inner_loop); +unforkify(*); +float-collections(*); gcm(*); - diff --git a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch index 33c46dab..d91f1b00 100644 --- a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch +++ b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch @@ -1,23 +1,32 @@ -gvn(*); -dce(*); -phi-elim(*); -dce(*); -crc(*); -dce(*); -slf(*); -dce(*); +macro simpl!(X) { + ccp(X); + simplify-cfg(X); + lift-dc-math(X); + gvn(X); + phi-elim(X); + crc(X); + slf(X); + dce(X); + infer-schedules(X); +} -let auto = auto-outline(pre_euler); -gpu(auto.pre_euler); - -inline(auto.pre_euler); -inline(auto.pre_euler); +simpl!(*); +inline(compute_step_factor, compute_flux, compute_flux_contributions, compute_flux_contribution, time_step); delete-uncalled(*); +gpu(copy_vars, compute_step_factor, compute_flux_contributions, compute_flux, time_step); -sroa[false](auto.pre_euler); -dce(*); -float-collections(*); -dce(*); +simpl!(*); +ip-sroa[true](*); +sroa[true](*); +predication(*); +const-inline(*); +simpl!(*); +fixpoint { + forkify(*); + fork-guard-elim(*); +} +simpl!(*); +unforkify(*); +float-collections(*); gcm(*); - -- GitLab