From d5f4aacedf2ee39ce4fe2e254cad5197df092877 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 2 Mar 2025 10:45:57 -0600
Subject: [PATCH] CFD opt

---
 hercules_opt/src/sroa.rs                      | 11 ++++++++---
 juno_samples/rodinia/cfd/src/cpu_euler.sch    |  2 +-
 juno_samples/rodinia/cfd/src/gpu_euler.sch    | 18 ++++++++++++++----
 .../rodinia/cfd/src/gpu_pre_euler.sch         | 19 +++++++++++++++++--
 4 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/hercules_opt/src/sroa.rs b/hercules_opt/src/sroa.rs
index e658ff88..2718f99d 100644
--- a/hercules_opt/src/sroa.rs
+++ b/hercules_opt/src/sroa.rs
@@ -447,7 +447,7 @@ pub fn sroa(
                 field_map.insert(node, generate_reads(editor, types[&node], node));
             }
             Node::Constant { id } => {
-                field_map.insert(node, generate_constant_fields(editor, id));
+                field_map.insert(node, generate_constant_fields(editor, id, node));
                 to_delete.push(node);
             }
             _ => {
@@ -1079,7 +1079,11 @@ pub fn generate_constant(editor: &mut FunctionEditor, typ: TypeID) -> ConstantID
 
 // Given a constant cnst adds node to the function which are the constant values of each field and
 // returns a list of pairs of indices and the node that holds that index
-fn generate_constant_fields(editor: &mut FunctionEditor, cnst: ConstantID) -> IndexTree<NodeID> {
+fn generate_constant_fields(
+    editor: &mut FunctionEditor,
+    cnst: ConstantID,
+    old_node: NodeID,
+) -> IndexTree<NodeID> {
     let cs: Option<Vec<ConstantID>> =
         if let Some(cs) = editor.get_constant(cnst).try_product_fields() {
             Some(cs.into())
@@ -1090,13 +1094,14 @@ fn generate_constant_fields(editor: &mut FunctionEditor, cnst: ConstantID) -> In
     if let Some(cs) = cs {
         let mut fields = vec![];
         for c in cs {
-            fields.push(generate_constant_fields(editor, c));
+            fields.push(generate_constant_fields(editor, c, old_node));
         }
         IndexTree::Node(fields)
     } else {
         let mut node = None;
         editor.edit(|mut edit| {
             node = Some(edit.add_node(Node::Constant { id: cnst }));
+            edit.sub_edit(old_node, node.unwrap());
             Ok(edit)
         });
         IndexTree::Leaf(node.expect("Add node cannot fail"))
diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch
index 4e7ee3cf..7a284a9a 100644
--- a/juno_samples/rodinia/cfd/src/cpu_euler.sch
+++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch
@@ -12,6 +12,7 @@ macro simpl!(X) {
 
 simpl!(*);
 inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step);
+no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res);
 delete-uncalled(*);
 simpl!(*);
 ip-sroa[true](*);
@@ -24,7 +25,6 @@ fixpoint {
   fork-guard-elim(*);
 }
 simpl!(*);
-no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res);
 unforkify(compute_flux@inner_loop);
 
 fork-tile[32, 0, false, false](compute_step_factor);
diff --git a/juno_samples/rodinia/cfd/src/gpu_euler.sch b/juno_samples/rodinia/cfd/src/gpu_euler.sch
index aed6115e..3700f79d 100644
--- a/juno_samples/rodinia/cfd/src/gpu_euler.sch
+++ b/juno_samples/rodinia/cfd/src/gpu_euler.sch
@@ -12,6 +12,7 @@ macro simpl!(X) {
 
 simpl!(*);
 inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step);
+no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res);
 delete-uncalled(*);
 gpu(copy_vars, compute_step_factor, compute_flux, time_step);
 
@@ -26,9 +27,18 @@ fixpoint {
   fork-guard-elim(*);
 }
 simpl!(*);
-no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res);
-parallel-reduce(time_step, copy_vars, compute_flux@outer_loop \ compute_flux@inner_loop);
+unforkify(compute_flux@inner_loop);
+
+fork-tile[32, 0, false, true](compute_step_factor);
+fork-split(compute_step_factor);
+
+fork-tile[32, 0, false, true](compute_flux);
+fork-split(compute_flux);
+
+fork-tile[32, 0, false, true](time_step);
+fork-split(time_step);
+
+fork-tile[32, 0, false, true](copy_vars);
+fork-split(copy_vars);
 
-unforkify(*);
-float-collections(*);
 gcm(*);
diff --git a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
index d91f1b00..d6db675b 100644
--- a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
+++ b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
@@ -12,6 +12,7 @@ macro simpl!(X) {
 
 simpl!(*);
 inline(compute_step_factor, compute_flux, compute_flux_contributions, compute_flux_contribution, time_step);
+no-memset(compute_step_factor@res, compute_flux_contributions@res, compute_flux@res, copy_vars@res);
 delete-uncalled(*);
 gpu(copy_vars, compute_step_factor, compute_flux_contributions, compute_flux, time_step);
 
@@ -26,7 +27,21 @@ fixpoint {
   fork-guard-elim(*);
 }
 simpl!(*);
+unforkify(compute_flux@inner_loop);
+
+fork-tile[32, 0, false, true](compute_step_factor);
+fork-split(compute_step_factor);
+
+fork-tile[32, 0, false, true](compute_flux_contributions);
+fork-split(compute_flux_contributions);
+
+fork-tile[32, 0, false, true](compute_flux);
+fork-split(compute_flux);
+
+fork-tile[32, 0, false, true](time_step);
+fork-split(time_step);
+
+fork-tile[32, 0, false, true](copy_vars);
+fork-split(copy_vars);
 
-unforkify(*);
-float-collections(*);
 gcm(*);
-- 
GitLab