From ff03fe68238c360455e1240766a77bd3ce494052 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 28 Feb 2025 18:06:10 -0600
Subject: [PATCH 1/6] ip-sroa params first attempt

---
 hercules_ir/src/typecheck.rs             |   8 +-
 hercules_opt/src/interprocedural_sroa.rs | 123 ++++++++++++++++++-----
 2 files changed, 106 insertions(+), 25 deletions(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 2a3f9fb1..b2567b8f 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -716,8 +716,10 @@ fn typeflow(
             // Check number of run-time arguments.
             if inputs.len() - 1 != callee.param_types.len() {
                 return Error(format!(
-                    "Call node has {} inputs, but calls a function with {} parameters.",
+                    "Call node in {} has {} inputs, but calls a function ({}) with {} parameters.",
+                    function.name,
                     inputs.len() - 1,
+                    callee.name,
                     callee.param_types.len(),
                 ));
             }
@@ -725,8 +727,10 @@ fn typeflow(
             // Check number of dynamic constant arguments.
             if dc_args.len() != callee.num_dynamic_constants as usize {
                 return Error(format!(
-                    "Call node references {} dynamic constants, but calls a function expecting {} dynamic constants.",
+                    "Call node in {} references {} dynamic constants, but calls a function ({}) expecting {} dynamic constants.",
+                    function.name,
                     dc_args.len(),
+                    callee.name,
                     callee.num_dynamic_constants
                 ));
             }
diff --git a/hercules_opt/src/interprocedural_sroa.rs b/hercules_opt/src/interprocedural_sroa.rs
index ad4ce19e..044d3590 100644
--- a/hercules_opt/src/interprocedural_sroa.rs
+++ b/hercules_opt/src/interprocedural_sroa.rs
@@ -39,46 +39,91 @@ pub fn interprocedural_sroa(
         }
 
         let editor: &mut FunctionEditor = &mut editors[func_id.idx()];
+        let param_types = &editor.func().param_types.to_vec();
         let return_types = &editor.func().return_types.to_vec();
 
-        // We determine the new return types of the function and track a map
-        // that tells us how the old return values are constructed from the
-        // new ones
+        // We determine the new param/return types of the function and track a
+        // map that tells us how the old param/return values are constructed
+        // from the new ones.
+        let mut new_param_types = vec![];
+        let mut old_param_type_map = vec![];
         let mut new_return_types = vec![];
         let mut old_return_type_map = vec![];
         let mut changed = false;
 
-        for ret_typ in return_types.iter() {
+        for ret_typ in param_types.iter() {
             if !can_sroa_type(editor, *ret_typ) {
+                old_param_type_map.push(IndexTree::Leaf(new_param_types.len()));
+                new_param_types.push(*ret_typ);
+            } else {
+                let (types, index) = sroa_type(editor, *ret_typ, new_param_types.len());
+                old_param_type_map.push(index);
+                new_param_types.extend(types);
+                changed = true;
+            }
+        }
+
+        for par_typ in return_types.iter() {
+            if !can_sroa_type(editor, *par_typ) {
                 old_return_type_map.push(IndexTree::Leaf(new_return_types.len()));
-                new_return_types.push(*ret_typ);
+                new_return_types.push(*par_typ);
             } else {
-                let (types, index) = sroa_type(editor, *ret_typ, new_return_types.len());
+                let (types, index) = sroa_type(editor, *par_typ, new_return_types.len());
                 old_return_type_map.push(index);
                 new_return_types.extend(types);
                 changed = true;
             }
         }
 
-        // If the return type is not changed by IP SROA, skip to the next function
+        // If the param/return types aren't changed by IP SROA, skip to the next
+        // function.
         if !changed {
             continue;
         }
 
-        // Now, modify each return in the current function and the return type
-        let return_nodes = editor
-            .func()
-            .nodes
-            .iter()
-            .enumerate()
-            .filter_map(|(idx, node)| {
-                if node.try_return().is_some() {
-                    Some(NodeID::new(idx))
+        // Modify each parameter in the current function and the param types.
+        let mut param_nodes: Vec<_> = vec![vec![]; param_types.len()];
+        for id in editor.node_ids() {
+            if let Some(idx) = editor.func().nodes[id.idx()].try_parameter() {
+                param_nodes[idx].push(id);
+            }
+        }
+        println!("{}", editor.func().name);
+        let success = editor.edit(|mut edit| {
+            for (idx, ids) in param_nodes.into_iter().enumerate() {
+                let new_indices = &old_param_type_map[idx];
+                let built = if let IndexTree::Leaf(new_idx) = new_indices {
+                    edit.add_node(Node::Parameter { index: *new_idx })
                 } else {
-                    None
+                    let prod_ty = param_types[idx];
+                    let cons = edit.add_zero_constant(prod_ty);
+                    let mut cons = edit.add_node(Node::Constant { id: cons });
+                    new_indices.for_each(|idx: &Vec<Index>, param_idx: &usize| {
+                        let param = edit.add_node(Node::Parameter { index: *param_idx });
+                        cons = edit.add_node(Node::Write {
+                            collect: cons,
+                            data: param,
+                            indices: idx.clone().into_boxed_slice(),
+                        });
+                    });
+                    cons
+                };
+                for id in ids {
+                    edit = edit.replace_all_uses(id, built)?;
+                    edit = edit.delete_node(id)?;
                 }
-            })
-            .collect::<Vec<_>>();
+            }
+
+            edit.set_param_types(new_param_types);
+            Ok(edit)
+        });
+        assert!(success, "IP SROA expects to be able to edit everything, specify what functions to IP SROA via the func_selection argument");
+
+        // Modify each return in the current function and the return types.
+        let return_nodes: Vec<_> = editor
+            .node_ids()
+            .filter(|id| editor.func().nodes[id.idx()].is_return())
+            .collect();
         let success = editor.edit(|mut edit| {
             for node in return_nodes {
                 let Node::Return { control, data } = edit.get_node(node) else {
@@ -114,17 +159,15 @@ pub fn interprocedural_sroa(
             }
 
             edit.set_return_types(new_return_types);
-
             Ok(edit)
         });
         assert!(success, "IP SROA expects to be able to edit everything, specify what functions to IP SROA via the func_selection argument");
 
-        // Finally, update calls of this function
-        // In particular, we actually don't have to update the call node at all but have to update
-        // its DataProjection users
+        // Finally, update calls of this function.
         for (caller, callsite) in callsites {
             let editor = &mut editors[caller.idx()];
             assert!(editor.func_id() == caller);
+
             let projs = editor.get_users(callsite).collect::<Vec<_>>();
             for proj_id in projs {
                 let Node::DataProjection { data: _, selection } = editor.node(proj_id) else {
@@ -134,6 +177,40 @@ pub fn interprocedural_sroa(
                 let typ = types[caller.idx()][proj_id.idx()];
                 replace_returned_value(editor, proj_id, typ, new_return_info, callsite);
             }
+
+            let (control, callee, dc_args, args) =
+                editor.func().nodes[callsite.idx()].try_call().unwrap();
+            let dc_args = dc_args.clone();
+            let args = args.clone();
+            let success = editor.edit(|mut edit| {
+                let mut new_args = vec![];
+                for (idx, (data_id, update_info)) in
+                    args.iter().zip(old_param_type_map.iter()).enumerate()
+                {
+                    if let IndexTree::Leaf(new_idx) = update_info {
+                        // Unchanged parameter value
+                        assert!(new_args.len() == *new_idx);
+                        new_args.push(*data_id);
+                    } else {
+                        // SROA'd parameter value
+                        let reads = generate_reads_edit(&mut edit, param_types[idx], *data_id);
+                        reads.zip(update_info).for_each(|_, (read_id, ret_idx)| {
+                            assert!(new_args.len() == **ret_idx);
+                            new_args.push(*read_id);
+                        });
+                    }
+                }
+                let new_call = edit.add_node(Node::Call {
+                    control,
+                    function: callee,
+                    dynamic_constants: dc_args,
+                    args: new_args.into_boxed_slice(),
+                });
+                edit = edit.replace_all_uses(callsite, new_call)?;
+                edit = edit.delete_node(callsite)?;
+                Ok(edit)
+            });
+            assert!(success);
         }
     }
 }
-- 
GitLab


From 263b2e0c82483c2d51b76b9fb9ca60e5fd244940 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 28 Feb 2025 19:28:58 -0600
Subject: [PATCH 2/6] whoops

---
 hercules_opt/src/interprocedural_sroa.rs | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/hercules_opt/src/interprocedural_sroa.rs b/hercules_opt/src/interprocedural_sroa.rs
index 044d3590..c7ea5836 100644
--- a/hercules_opt/src/interprocedural_sroa.rs
+++ b/hercules_opt/src/interprocedural_sroa.rs
@@ -1,6 +1,3 @@
-use std::collections::HashMap;
-use std::iter::zip;
-
 use hercules_ir::ir::*;
 
 use crate::*;
@@ -51,24 +48,24 @@ pub fn interprocedural_sroa(
         let mut old_return_type_map = vec![];
         let mut changed = false;
 
-        for ret_typ in param_types.iter() {
-            if !can_sroa_type(editor, *ret_typ) {
+        for par_typ in param_types.iter() {
+            if !can_sroa_type(editor, *par_typ) {
                 old_param_type_map.push(IndexTree::Leaf(new_param_types.len()));
-                new_param_types.push(*ret_typ);
+                new_param_types.push(*par_typ);
             } else {
-                let (types, index) = sroa_type(editor, *ret_typ, new_param_types.len());
+                let (types, index) = sroa_type(editor, *par_typ, new_param_types.len());
                 old_param_type_map.push(index);
                 new_param_types.extend(types);
                 changed = true;
             }
         }
 
-        for par_typ in return_types.iter() {
-            if !can_sroa_type(editor, *par_typ) {
+        for ret_typ in return_types.iter() {
+            if !can_sroa_type(editor, *ret_typ) {
                 old_return_type_map.push(IndexTree::Leaf(new_return_types.len()));
-                new_return_types.push(*par_typ);
+                new_return_types.push(*ret_typ);
             } else {
-                let (types, index) = sroa_type(editor, *par_typ, new_return_types.len());
+                let (types, index) = sroa_type(editor, *ret_typ, new_return_types.len());
                 old_return_type_map.push(index);
                 new_return_types.extend(types);
                 changed = true;
-- 
GitLab


From f3ddf1d2e3b74be58efd5a434ad8f7a9f6711b20 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 28 Feb 2025 19:42:08 -0600
Subject: [PATCH 3/6] Handle outputs

---
 juno_samples/rodinia/cfd/src/cpu_euler.sch    |  4 +-
 .../rodinia/cfd/src/cpu_pre_euler.sch         |  4 +-
 juno_samples/rodinia/cfd/src/lib.rs           | 39 ++++++++++++++-----
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch
index 1244f80e..4cf320a6 100644
--- a/juno_samples/rodinia/cfd/src/cpu_euler.sch
+++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch
@@ -14,8 +14,8 @@ simpl!(*);
 inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step);
 delete-uncalled(*);
 simpl!(*);
-ip-sroa[false](*);
-sroa[false](*);
+ip-sroa[true](*);
+sroa[true](*);
 predication(*);
 const-inline(*);
 simpl!(*);
diff --git a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
index 6329c504..14eb6906 100644
--- a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
+++ b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
@@ -14,8 +14,8 @@ simpl!(*);
 inline(compute_step_factor, compute_flux, compute_flux_contributions, compute_flux_contribution, time_step);
 delete-uncalled(*);
 simpl!(*);
-ip-sroa[false](*);
-sroa[false](*);
+ip-sroa[true](*);
+sroa[true](*);
 predication(*);
 const-inline(*);
 simpl!(*);
diff --git a/juno_samples/rodinia/cfd/src/lib.rs b/juno_samples/rodinia/cfd/src/lib.rs
index 39384c0d..e054a605 100644
--- a/juno_samples/rodinia/cfd/src/lib.rs
+++ b/juno_samples/rodinia/cfd/src/lib.rs
@@ -55,8 +55,7 @@ fn run_euler(
     let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice());
 
     let mut runner = runner!(euler);
-
-    HerculesMutBox::from(async_std::task::block_on(async {
+    let (density, x_m, y_m, z_m, energy) = async_std::task::block_on(async {
         runner
             .run(
                 nelr as u64,
@@ -72,9 +71,20 @@ fn run_euler(
                 ff_fc_momentum_z.to(),
             )
             .await
-    }))
-    .as_slice()
-    .to_vec()
+    });
+
+    let total = vec![];
+    let density = HerculesMutBox::from(density).as_slice();
+    let x_m = HerculesMutBox::from(x_m).as_slice();
+    let y_m = HerculesMutBox::from(y_m).as_slice();
+    let z_m = HerculesMutBox::from(z_m).as_slice();
+    let energy = HerculesMutBox::from(energy).as_slice();
+    total.extend(density.into_iter().map(|id: &mut f32| *id));
+    total.extend(x_m.into_iter().map(|id: &mut f32| *id));
+    total.extend(y_m.into_iter().map(|id: &mut f32| *id));
+    total.extend(z_m.into_iter().map(|id: &mut f32| *id));
+    total.extend(energy.into_iter().map(|id: &mut f32| *id));
+    total
 }
 
 fn run_pre_euler(
@@ -114,7 +124,7 @@ fn run_pre_euler(
 
     let variables = variables.to();
 
-    HerculesMutBox::from(async_std::task::block_on(async {
+    let (density, x_m, y_m, z_m, energy) = async_std::task::block_on(async {
         runner
             .run(
                 nelr as u64,
@@ -130,9 +140,20 @@ fn run_pre_euler(
                 ff_fc_momentum_z.to(),
             )
             .await
-    }))
-    .as_slice()
-    .to_vec()
+    });
+
+    let total = vec![];
+    let density = HerculesMutBox::from(density).as_slice();
+    let x_m = HerculesMutBox::from(x_m).as_slice();
+    let y_m = HerculesMutBox::from(y_m).as_slice();
+    let z_m = HerculesMutBox::from(z_m).as_slice();
+    let energy = HerculesMutBox::from(energy).as_slice();
+    total.extend(density.into_iter().map(|id: &mut f32| *id));
+    total.extend(x_m.into_iter().map(|id: &mut f32| *id));
+    total.extend(y_m.into_iter().map(|id: &mut f32| *id));
+    total.extend(z_m.into_iter().map(|id: &mut f32| *id));
+    total.extend(energy.into_iter().map(|id: &mut f32| *id));
+    total
 }
 
 fn compare_float(x: f32, y: f32) -> bool {
-- 
GitLab


From cdcc0afded55bd30537366c3225e4d786e407c77 Mon Sep 17 00:00:00 2001
From: Aaron Councilman <aaronjc4@illinois.edu>
Date: Sat, 1 Mar 2025 13:43:19 -0600
Subject: [PATCH 4/6] Modify cfd to break up input products

---
 juno_samples/rodinia/cfd/src/lib.rs      | 229 +++++++++++++----------
 juno_samples/rodinia/cfd/src/rust_cfd.rs | 195 ++++++++++---------
 juno_samples/rodinia/cfd/src/setup.rs    | 140 ++++++++++----
 3 files changed, 335 insertions(+), 229 deletions(-)

diff --git a/juno_samples/rodinia/cfd/src/lib.rs b/juno_samples/rodinia/cfd/src/lib.rs
index e054a605..62ee59f4 100644
--- a/juno_samples/rodinia/cfd/src/lib.rs
+++ b/juno_samples/rodinia/cfd/src/lib.rs
@@ -24,144 +24,180 @@ pub struct CFDInputs {
 fn run_euler(
     nelr: usize,
     iterations: usize,
-    mut variables: AlignedSlice<f32>,
+    mut variables: Variables,
     areas: &[f32],
     elements_surrounding_elements: &[i32],
-    normals: &[f32],
-    ff_variable: &[f32],
+    normals: &Normals,
+    ff_variable: &Variable,
     ff_fc_density_energy: &Float3,
     ff_fc_momentum_x: &Float3,
     ff_fc_momentum_y: &Float3,
     ff_fc_momentum_z: &Float3,
-) -> Vec<f32> {
-    let mut variables = HerculesMutBox::from(variables.as_mut_slice());
+) -> Variables {
+    let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice());
+    let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice());
+    let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice());
+    let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice());
+    let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice());
+
     let areas = HerculesImmBox::from(areas);
     let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements);
-    let normals = HerculesImmBox::from(normals);
-    let ff_variable = HerculesImmBox::from(ff_variable);
 
-    // TODO: Make hercules box handle structs, for now we'll copy into a vec
-    let ff_fc_density_energy = vec![
-        ff_fc_density_energy.x,
-        ff_fc_density_energy.y,
-        ff_fc_density_energy.z,
-    ];
-    let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice());
-    let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z];
-    let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice());
-    let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z];
-    let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice());
-    let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z];
-    let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice());
+    let normals_x = HerculesImmBox::from(normals.x.as_slice());
+    let normals_y = HerculesImmBox::from(normals.y.as_slice());
+    let normals_z = HerculesImmBox::from(normals.z.as_slice());
 
     let mut runner = runner!(euler);
-    let (density, x_m, y_m, z_m, energy) = async_std::task::block_on(async {
+    let (density, momentum_x, momentum_y, momentum_z, energy) = 
+    async_std::task::block_on(async {
         runner
             .run(
                 nelr as u64,
                 iterations as u64,
-                variables.to(),
+                v_density.to(),
+                v_momentum_x.to(),
+                v_momentum_y.to(),
+                v_momentum_z.to(),
+                v_energy.to(),
                 areas.to(),
                 elements_surrounding_elements.to(),
-                normals.to(),
-                ff_variable.to(),
-                ff_fc_density_energy.to(),
-                ff_fc_momentum_x.to(),
-                ff_fc_momentum_y.to(),
-                ff_fc_momentum_z.to(),
+                normals_x.to(),
+                normals_y.to(),
+                normals_z.to(),
+                ff_variable.density,
+                ff_variable.momentum.x,
+                ff_variable.momentum.y,
+                ff_variable.momentum.z,
+                ff_variable.energy,
+                ff_fc_density_energy.x,
+                ff_fc_density_energy.y,
+                ff_fc_density_energy.z,
+                ff_fc_momentum_x.x,
+                ff_fc_momentum_x.y,
+                ff_fc_momentum_x.z,
+                ff_fc_momentum_y.x,
+                ff_fc_momentum_y.y,
+                ff_fc_momentum_y.z,
+                ff_fc_momentum_z.x,
+                ff_fc_momentum_z.y,
+                ff_fc_momentum_z.z,
             )
             .await
     });
 
-    let total = vec![];
-    let density = HerculesMutBox::from(density).as_slice();
-    let x_m = HerculesMutBox::from(x_m).as_slice();
-    let y_m = HerculesMutBox::from(y_m).as_slice();
-    let z_m = HerculesMutBox::from(z_m).as_slice();
-    let energy = HerculesMutBox::from(energy).as_slice();
-    total.extend(density.into_iter().map(|id: &mut f32| *id));
-    total.extend(x_m.into_iter().map(|id: &mut f32| *id));
-    total.extend(y_m.into_iter().map(|id: &mut f32| *id));
-    total.extend(z_m.into_iter().map(|id: &mut f32| *id));
-    total.extend(energy.into_iter().map(|id: &mut f32| *id));
-    total
+    Variables {
+        density: AlignedSlice::from_slice(HerculesMutBox::from(density).as_slice()),
+        momentum: Momentum {
+            x: AlignedSlice::from_slice(HerculesMutBox::from(momentum_x).as_slice()),
+            y: AlignedSlice::from_slice(HerculesMutBox::from(momentum_y).as_slice()),
+            z: AlignedSlice::from_slice(HerculesMutBox::from(momentum_z).as_slice()),
+        },
+        energy: AlignedSlice::from_slice(HerculesMutBox::from(energy).as_slice()),
+    }
 }
 
 fn run_pre_euler(
     nelr: usize,
     iterations: usize,
-    mut variables: AlignedSlice<f32>,
+    mut variables: Variables,
     areas: &[f32],
     elements_surrounding_elements: &[i32],
-    normals: &[f32],
-    ff_variable: &[f32],
+    normals: &Normals,
+    ff_variable: &Variable,
     ff_fc_density_energy: &Float3,
     ff_fc_momentum_x: &Float3,
     ff_fc_momentum_y: &Float3,
     ff_fc_momentum_z: &Float3,
-) -> Vec<f32> {
-    let mut variables = HerculesMutBox::from(variables.as_mut_slice());
+) -> Variables {
+    let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice());
+    let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice());
+    let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice());
+    let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice());
+    let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice());
+
     let areas = HerculesImmBox::from(areas);
     let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements);
-    let normals = HerculesImmBox::from(normals);
-    let ff_variable = HerculesImmBox::from(ff_variable);
 
-    // TODO: Make hercules box handle structs, for now we'll copy into a vec
-    let ff_fc_density_energy = vec![
-        ff_fc_density_energy.x,
-        ff_fc_density_energy.y,
-        ff_fc_density_energy.z,
-    ];
-    let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice());
-    let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z];
-    let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice());
-    let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z];
-    let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice());
-    let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z];
-    let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice());
+    let normals_x = HerculesImmBox::from(normals.x.as_slice());
+    let normals_y = HerculesImmBox::from(normals.y.as_slice());
+    let normals_z = HerculesImmBox::from(normals.z.as_slice());
 
     let mut runner = runner!(pre_euler);
-
-    let variables = variables.to();
-
-    let (density, x_m, y_m, z_m, energy) = async_std::task::block_on(async {
+    let (density, momentum_x, momentum_y, momentum_z, energy) = 
+    async_std::task::block_on(async {
         runner
             .run(
                 nelr as u64,
                 iterations as u64,
-                variables,
+                v_density.to(),
+                v_momentum_x.to(),
+                v_momentum_y.to(),
+                v_momentum_z.to(),
+                v_energy.to(),
                 areas.to(),
                 elements_surrounding_elements.to(),
-                normals.to(),
-                ff_variable.to(),
-                ff_fc_density_energy.to(),
-                ff_fc_momentum_x.to(),
-                ff_fc_momentum_y.to(),
-                ff_fc_momentum_z.to(),
+                normals_x.to(),
+                normals_y.to(),
+                normals_z.to(),
+                ff_variable.density,
+                ff_variable.momentum.x,
+                ff_variable.momentum.y,
+                ff_variable.momentum.z,
+                ff_variable.energy,
+                ff_fc_density_energy.x,
+                ff_fc_density_energy.y,
+                ff_fc_density_energy.z,
+                ff_fc_momentum_x.x,
+                ff_fc_momentum_x.y,
+                ff_fc_momentum_x.z,
+                ff_fc_momentum_y.x,
+                ff_fc_momentum_y.y,
+                ff_fc_momentum_y.z,
+                ff_fc_momentum_z.x,
+                ff_fc_momentum_z.y,
+                ff_fc_momentum_z.z,
             )
             .await
     });
 
-    let total = vec![];
-    let density = HerculesMutBox::from(density).as_slice();
-    let x_m = HerculesMutBox::from(x_m).as_slice();
-    let y_m = HerculesMutBox::from(y_m).as_slice();
-    let z_m = HerculesMutBox::from(z_m).as_slice();
-    let energy = HerculesMutBox::from(energy).as_slice();
-    total.extend(density.into_iter().map(|id: &mut f32| *id));
-    total.extend(x_m.into_iter().map(|id: &mut f32| *id));
-    total.extend(y_m.into_iter().map(|id: &mut f32| *id));
-    total.extend(z_m.into_iter().map(|id: &mut f32| *id));
-    total.extend(energy.into_iter().map(|id: &mut f32| *id));
-    total
+    Variables {
+        density: AlignedSlice::from_slice(HerculesMutBox::from(density).as_slice()),
+        momentum: Momentum {
+            x: AlignedSlice::from_slice(HerculesMutBox::from(momentum_x).as_slice()),
+            y: AlignedSlice::from_slice(HerculesMutBox::from(momentum_y).as_slice()),
+            z: AlignedSlice::from_slice(HerculesMutBox::from(momentum_z).as_slice()),
+        },
+        energy: AlignedSlice::from_slice(HerculesMutBox::from(energy).as_slice()),
+    }
 }
 
 fn compare_float(x: f32, y: f32) -> bool {
     (x - y).abs() < 1e-5
 }
 
-fn compare_floats(xs: &[f32], ys: &[f32]) -> bool {
-    xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y))
+fn compare_floats(xs: &Variables, ys: &Variables) -> bool {
+    let xs_density = xs.density.as_slice();
+    let xs_momentum_x = xs.momentum.x.as_slice();
+    let xs_momentum_y = xs.momentum.y.as_slice();
+    let xs_momentum_z = xs.momentum.z.as_slice();
+    let xs_energy = xs.energy.as_slice();
+
+    let ys_density = ys.density.as_slice();
+    let ys_momentum_x = ys.momentum.x.as_slice();
+    let ys_momentum_y = ys.momentum.y.as_slice();
+    let ys_momentum_z = ys.momentum.z.as_slice();
+    let ys_energy = ys.energy.as_slice();
+
+    xs_density.len() == ys_density.len()
+        && xs_density.iter().zip(ys_density.iter()).all(|(x, y)| compare_float(*x, *y))
+        && xs_momentum_x.len() == ys_momentum_x.len()
+        && xs_momentum_x.iter().zip(ys_momentum_x.iter()).all(|(x, y)| compare_float(*x, *y))
+        && xs_momentum_y.len() == ys_momentum_y.len()
+        && xs_momentum_y.iter().zip(ys_momentum_y.iter()).all(|(x, y)| compare_float(*x, *y))
+        && xs_momentum_z.len() == ys_momentum_z.len()
+        && xs_momentum_z.iter().zip(ys_momentum_z.iter()).all(|(x, y)| compare_float(*x, *y))
+        && xs_energy.len() == ys_energy.len()
+        && xs_energy.iter().zip(ys_energy.iter()).all(|(x, y)| compare_float(*x, *y))
 }
 
 pub fn cfd_harness(args: CFDInputs) {
@@ -172,8 +208,6 @@ pub fn cfd_harness(args: CFDInputs) {
         pre_euler,
     } = args;
 
-    assert!(block_size % 16 == 0, "Hercules expects all arrays to be 64-byte aligned, cfd uses structs of arrays that are annoying to deal with if the block_size is not a multiple of 16");
-
     let FarFieldConditions {
         ff_variable,
         ff_fc_momentum_x,
@@ -189,7 +223,7 @@ pub fn cfd_harness(args: CFDInputs) {
         normals,
     } = read_domain_geometry(data_file, block_size);
 
-    let variables = initialize_variables(nelr, ff_variable.as_slice());
+    let variables = initialize_variables(nelr, &ff_variable);
 
     let res_juno = if pre_euler {
         run_pre_euler(
@@ -198,8 +232,8 @@ pub fn cfd_harness(args: CFDInputs) {
             variables.clone(),
             areas.as_slice(),
             elements_surrounding_elements.as_slice(),
-            normals.as_slice(),
-            ff_variable.as_slice(),
+            &normals,
+            &ff_variable,
             &ff_fc_density_energy,
             &ff_fc_momentum_x,
             &ff_fc_momentum_y,
@@ -212,8 +246,8 @@ pub fn cfd_harness(args: CFDInputs) {
             variables.clone(),
             areas.as_slice(),
             elements_surrounding_elements.as_slice(),
-            normals.as_slice(),
-            ff_variable.as_slice(),
+            &normals,
+            &ff_variable,
             &ff_fc_density_energy,
             &ff_fc_momentum_x,
             &ff_fc_momentum_y,
@@ -227,8 +261,8 @@ pub fn cfd_harness(args: CFDInputs) {
             variables,
             areas.as_slice(),
             elements_surrounding_elements.as_slice(),
-            normals.as_slice(),
-            ff_variable.as_slice(),
+            &normals,
+            &ff_variable,
             &ff_fc_density_energy,
             &ff_fc_momentum_x,
             &ff_fc_momentum_y,
@@ -241,8 +275,8 @@ pub fn cfd_harness(args: CFDInputs) {
             variables,
             areas.as_slice(),
             elements_surrounding_elements.as_slice(),
-            normals.as_slice(),
-            ff_variable.as_slice(),
+            &normals,
+            &ff_variable,
             &ff_fc_density_energy,
             &ff_fc_momentum_x,
             &ff_fc_momentum_y,
@@ -250,8 +284,7 @@ pub fn cfd_harness(args: CFDInputs) {
         )
     };
 
-    if !compare_floats(&res_juno, res_rust.as_slice()) {
-        assert_eq!(res_juno.len(), res_rust.as_slice().len());
+    if !compare_floats(&res_juno, &res_rust) {
         panic!("Mismatch in results");
     }
 }
diff --git a/juno_samples/rodinia/cfd/src/rust_cfd.rs b/juno_samples/rodinia/cfd/src/rust_cfd.rs
index 42479df8..6d92c2b5 100644
--- a/juno_samples/rodinia/cfd/src/rust_cfd.rs
+++ b/juno_samples/rodinia/cfd/src/rust_cfd.rs
@@ -58,19 +58,19 @@ fn compute_speed_of_sound(density: f32, pressure: f32) -> f32 {
     (GAMMA * pressure / density).sqrt()
 }
 
-fn compute_step_factor(nelr: usize, variables: &[f32], areas: &[f32]) -> Vec<f32> {
+fn compute_step_factor(nelr: usize, variables: &Variables, areas: &[f32]) -> Vec<f32> {
     let mut step_factors = vec![0.0; nelr];
 
     for i in 0..nelr {
-        let density = variables[i + VAR_DENSITY * nelr];
+        let density = variables.density[i];
 
         let momentum = Float3 {
-            x: variables[i + (VAR_MOMENTUM + 0) * nelr],
-            y: variables[i + (VAR_MOMENTUM + 1) * nelr],
-            z: variables[i + (VAR_MOMENTUM + 2) * nelr],
+            x: variables.momentum.x[i],
+            y: variables.momentum.y[i],
+            z: variables.momentum.z[i],
         };
 
-        let density_energy = variables[i + VAR_DENSITY_ENERGY * nelr];
+        let density_energy = variables.energy[i];
 
         let velocity = compute_velocity(density, &momentum);
         let speed_sqd = compute_speed_sqd(&velocity);
@@ -85,28 +85,36 @@ fn compute_step_factor(nelr: usize, variables: &[f32], areas: &[f32]) -> Vec<f32
 
 fn compute_flux_euler(
     nelr: usize,
-    variables: &[f32],
+    variables: &Variables,
     elements_surrounding_elements: &[i32],
-    normals: &[f32],
-    ff_variable: &[f32],
+    normals: &Normals,
+    ff_variable: &Variable,
     ff_fc_momentum_x: &Float3,
     ff_fc_momentum_y: &Float3,
     ff_fc_momentum_z: &Float3,
     ff_fc_density_energy: &Float3,
-) -> Vec<f32> {
+) -> Variables {
     let smoothing_coefficient: f32 = 0.2;
 
-    let mut fluxes = vec![0.0; nelr * NVAR];
+    let mut fluxes = Variables {
+        density: AlignedSlice::of_len(nelr),
+        momentum: Momentum {
+            x: AlignedSlice::of_len(nelr),
+            y: AlignedSlice::of_len(nelr),
+            z: AlignedSlice::of_len(nelr),
+        },
+        energy: AlignedSlice::of_len(nelr),
+    };
 
     for i in 0..nelr {
-        let density_i = variables[i + VAR_DENSITY * nelr];
+        let density_i = variables.density[i];
         let momentum_i = Float3 {
-            x: variables[i + (VAR_MOMENTUM + 0) * nelr],
-            y: variables[i + (VAR_MOMENTUM + 1) * nelr],
-            z: variables[i + (VAR_MOMENTUM + 2) * nelr],
+            x: variables.momentum.x[i],
+            y: variables.momentum.y[i],
+            z: variables.momentum.z[i],
         };
 
-        let density_energy_i = variables[i + VAR_DENSITY_ENERGY * nelr];
+        let density_energy_i = variables.energy[i];
 
         let velocity_i = compute_velocity(density_i, &momentum_i);
         let speed_sqd_i = compute_speed_sqd(&velocity_i);
@@ -134,9 +142,9 @@ fn compute_flux_euler(
         for j in 0..NNB {
             let nb = elements_surrounding_elements[i + j * nelr];
             let normal = Float3 {
-                x: normals[i + (j + 0 * NNB) * nelr],
-                y: normals[i + (j + 1 * NNB) * nelr],
-                z: normals[i + (j + 2 * NNB) * nelr],
+                x: normals.x[i + j * nelr],
+                y: normals.y[i + j * nelr],
+                z: normals.z[i + j * nelr],
             };
             let normal_len =
                 (normal.x * normal.x + normal.y * normal.y + normal.z * normal.z).sqrt();
@@ -144,13 +152,13 @@ fn compute_flux_euler(
             if nb >= 0 {
                 // legitimate neighbor
                 let nb = nb as usize;
-                let density_nb = variables[nb + VAR_DENSITY * nelr];
+                let density_nb = variables.density[nb];
                 let momentum_nb = Float3 {
-                    x: variables[nb + (VAR_MOMENTUM + 0) * nelr],
-                    y: variables[nb + (VAR_MOMENTUM + 1) * nelr],
-                    z: variables[nb + (VAR_MOMENTUM + 2) * nelr],
+                    x: variables.momentum.x[nb],
+                    y: variables.momentum.y[nb],
+                    z: variables.momentum.z[nb],
                 };
-                let density_energy_nb = variables[nb + VAR_DENSITY_ENERGY * nelr];
+                let density_energy_nb = variables.energy[nb];
                 let velocity_nb = compute_velocity(density_nb, &momentum_nb);
                 let speed_sqd_nb = compute_speed_sqd(&velocity_nb);
                 let pressure_nb = compute_pressure(density_nb, density_energy_nb, speed_sqd_nb);
@@ -205,21 +213,21 @@ fn compute_flux_euler(
             } else if nb == -2 {
                 // a far field boundary
                 let factor = 0.5 * normal.x;
-                flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 0] + momentum_i.x);
+                flux_i_density += factor * (ff_variable.momentum.x + momentum_i.x);
                 flux_i_density_energy += factor * (ff_fc_density_energy.x + fc_i_density_energy.x);
                 flux_i_momentum.x += factor * (ff_fc_momentum_x.x + fc_i_momentum_x.x);
                 flux_i_momentum.y += factor * (ff_fc_momentum_y.x + fc_i_momentum_y.x);
                 flux_i_momentum.z += factor * (ff_fc_momentum_z.x + fc_i_momentum_z.x);
 
                 let factor = 0.5 * normal.y;
-                flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 1] + momentum_i.y);
+                flux_i_density += factor * (ff_variable.momentum.y + momentum_i.y);
                 flux_i_density_energy += factor * (ff_fc_density_energy.y + fc_i_density_energy.y);
                 flux_i_momentum.x += factor * (ff_fc_momentum_x.y + fc_i_momentum_x.y);
                 flux_i_momentum.y += factor * (ff_fc_momentum_y.y + fc_i_momentum_y.y);
                 flux_i_momentum.z += factor * (ff_fc_momentum_z.y + fc_i_momentum_z.y);
 
                 let factor = 0.5 * normal.z;
-                flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 2] + momentum_i.z);
+                flux_i_density += factor * (ff_variable.momentum.z + momentum_i.z);
                 flux_i_density_energy += factor * (ff_fc_density_energy.z + fc_i_density_energy.z);
                 flux_i_momentum.x += factor * (ff_fc_momentum_x.z + fc_i_momentum_x.z);
                 flux_i_momentum.y += factor * (ff_fc_momentum_y.z + fc_i_momentum_y.z);
@@ -227,11 +235,11 @@ fn compute_flux_euler(
             }
         }
 
-        fluxes[i + VAR_DENSITY * nelr] = flux_i_density;
-        fluxes[i + (VAR_MOMENTUM + 0) * nelr] = flux_i_momentum.x;
-        fluxes[i + (VAR_MOMENTUM + 1) * nelr] = flux_i_momentum.y;
-        fluxes[i + (VAR_MOMENTUM + 2) * nelr] = flux_i_momentum.z;
-        fluxes[i + VAR_DENSITY_ENERGY * nelr] = flux_i_density_energy;
+        fluxes.density[i] = flux_i_density;
+        fluxes.momentum.x[i] = flux_i_momentum.x;
+        fluxes.momentum.y[i] = flux_i_momentum.y;
+        fluxes.momentum.z[i] = flux_i_momentum.z;
+        fluxes.energy[i] = flux_i_density_energy;
     }
 
     fluxes
@@ -239,7 +247,7 @@ fn compute_flux_euler(
 
 fn compute_flux_contributions(
     nelr: usize,
-    variables: &[f32],
+    variables: &Variables,
 ) -> (Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) {
     let mut fc_momentum_x = vec![0.0; nelr * NDIM];
     let mut fc_momentum_y = vec![0.0; nelr * NDIM];
@@ -247,14 +255,14 @@ fn compute_flux_contributions(
     let mut fc_density_energy = vec![0.0; nelr * NDIM];
 
     for i in 0..nelr {
-        let density_i = variables[i + VAR_DENSITY * nelr];
+        let density_i = variables.density[i];
         let momentum_i = Float3 {
-            x: variables[i + (VAR_MOMENTUM + 0) * nelr],
-            y: variables[i + (VAR_MOMENTUM + 1) * nelr],
-            z: variables[i + (VAR_MOMENTUM + 2) * nelr],
+            x: variables.momentum.x[i],
+            y: variables.momentum.y[i],
+            z: variables.momentum.z[i],
         };
 
-        let density_energy_i = variables[i + VAR_DENSITY_ENERGY * nelr];
+        let density_energy_i = variables.energy[i];
 
         let velocity_i = compute_velocity(density_i, &momentum_i);
         let speed_sqd_i = compute_speed_sqd(&velocity_i);
@@ -298,32 +306,40 @@ fn compute_flux_contributions(
 
 fn compute_flux_pre_euler(
     nelr: usize,
-    variables: &[f32],
+    variables: &Variables,
     elements_surrounding_elements: &[i32],
-    normals: &[f32],
+    normals: &Normals,
     fc_momentum_x: &[f32],
     fc_momentum_y: &[f32],
     fc_momentum_z: &[f32],
     fc_density_energy: &[f32],
-    ff_variable: &[f32],
+    ff_variable: &Variable,
     ff_fc_momentum_x: &Float3,
     ff_fc_momentum_y: &Float3,
     ff_fc_momentum_z: &Float3,
     ff_fc_density_energy: &Float3,
-) -> Vec<f32> {
+) -> Variables {
     let smoothing_coefficient: f32 = 0.2;
 
-    let mut fluxes = vec![0.0; nelr * NVAR];
+    let mut fluxes = Variables {
+        density: AlignedSlice::of_len(nelr),
+        momentum: Momentum {
+            x: AlignedSlice::of_len(nelr),
+            y: AlignedSlice::of_len(nelr),
+            z: AlignedSlice::of_len(nelr),
+        },
+        energy: AlignedSlice::of_len(nelr),
+    };
 
     for i in 0..nelr {
-        let density_i = variables[i + VAR_DENSITY * nelr];
+        let density_i = variables.density[i];
         let momentum_i = Float3 {
-            x: variables[i + (VAR_MOMENTUM + 0) * nelr],
-            y: variables[i + (VAR_MOMENTUM + 1) * nelr],
-            z: variables[i + (VAR_MOMENTUM + 2) * nelr],
+            x: variables.momentum.x[i],
+            y: variables.momentum.y[i],
+            z: variables.momentum.z[i],
         };
 
-        let density_energy_i = variables[i + VAR_DENSITY_ENERGY * nelr];
+        let density_energy_i = variables.energy[i];
 
         let velocity_i = compute_velocity(density_i, &momentum_i);
         let speed_sqd_i = compute_speed_sqd(&velocity_i);
@@ -363,22 +379,22 @@ fn compute_flux_pre_euler(
         for j in 0..NNB {
             let nb = elements_surrounding_elements[i + j * nelr];
             let normal = Float3 {
-                x: normals[i + (j + 0 * NNB) * nelr],
-                y: normals[i + (j + 1 * NNB) * nelr],
-                z: normals[i + (j + 2 * NNB) * nelr],
+                x: normals.x[i + j * nelr],
+                y: normals.y[i + j * nelr],
+                z: normals.z[i + j * nelr],
             };
             let normal_len =
                 (normal.x * normal.x + normal.y * normal.y + normal.z * normal.z).sqrt();
 
             if nb >= 0 {
                 let nb = nb as usize;
-                let density_nb = variables[nb + VAR_DENSITY * nelr];
+                let density_nb = variables.density[nb];
                 let momentum_nb = Float3 {
-                    x: variables[nb + (VAR_MOMENTUM + 0) * nelr],
-                    y: variables[nb + (VAR_MOMENTUM + 1) * nelr],
-                    z: variables[nb + (VAR_MOMENTUM + 2) * nelr],
+                    x: variables.momentum.x[nb],
+                    y: variables.momentum.y[nb],
+                    z: variables.momentum.z[nb],
                 };
-                let density_energy_nb = variables[nb + VAR_DENSITY_ENERGY * nelr];
+                let density_energy_nb = variables.energy[nb];
                 let velocity_nb = compute_velocity(density_nb, &momentum_nb);
                 let speed_sqd_nb = compute_speed_sqd(&velocity_nb);
                 let pressure_nb = compute_pressure(density_nb, density_energy_nb, speed_sqd_nb);
@@ -443,21 +459,21 @@ fn compute_flux_pre_euler(
                 flux_i_momentum.z += normal.z * pressure_i;
             } else if nb == -2 {
                 let factor = 0.5 * normal.x;
-                flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 0] + momentum_i.x);
+                flux_i_density += factor * (ff_variable.momentum.x + momentum_i.x);
                 flux_i_density_energy += factor * (ff_fc_density_energy.x + fc_i_density_energy.x);
                 flux_i_momentum.x += factor * (ff_fc_momentum_x.x + fc_i_momentum_x.x);
                 flux_i_momentum.y += factor * (ff_fc_momentum_y.x + fc_i_momentum_y.x);
                 flux_i_momentum.z += factor * (ff_fc_momentum_z.x + fc_i_momentum_z.x);
 
                 let factor = 0.5 * normal.y;
-                flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 1] + momentum_i.y);
+                flux_i_density += factor * (ff_variable.momentum.y + momentum_i.y);
                 flux_i_density_energy += factor * (ff_fc_density_energy.y + fc_i_density_energy.y);
                 flux_i_momentum.x += factor * (ff_fc_momentum_x.y + fc_i_momentum_x.y);
                 flux_i_momentum.y += factor * (ff_fc_momentum_y.y + fc_i_momentum_y.y);
                 flux_i_momentum.z += factor * (ff_fc_momentum_z.y + fc_i_momentum_z.y);
 
                 let factor = 0.5 * normal.z;
-                flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 2] + momentum_i.z);
+                flux_i_density += factor * (ff_variable.momentum.z + momentum_i.z);
                 flux_i_density_energy += factor * (ff_fc_density_energy.z + fc_i_density_energy.z);
                 flux_i_momentum.x += factor * (ff_fc_momentum_x.z + fc_i_momentum_x.z);
                 flux_i_momentum.y += factor * (ff_fc_momentum_y.z + fc_i_momentum_y.z);
@@ -465,11 +481,11 @@ fn compute_flux_pre_euler(
             }
         }
 
-        fluxes[i + VAR_DENSITY * nelr] = flux_i_density;
-        fluxes[i + (VAR_MOMENTUM + 0) * nelr] = flux_i_momentum.x;
-        fluxes[i + (VAR_MOMENTUM + 1) * nelr] = flux_i_momentum.y;
-        fluxes[i + (VAR_MOMENTUM + 2) * nelr] = flux_i_momentum.z;
-        fluxes[i + VAR_DENSITY_ENERGY * nelr] = flux_i_density_energy;
+        fluxes.density[i] = flux_i_density;
+        fluxes.momentum.x[i] = flux_i_momentum.x;
+        fluxes.momentum.y[i] = flux_i_momentum.y;
+        fluxes.momentum.z[i] = flux_i_momentum.z;
+        fluxes.energy[i] = flux_i_density_energy;
     }
 
     fluxes
@@ -478,47 +494,42 @@ fn compute_flux_pre_euler(
 fn time_step(
     j: usize,
     nelr: usize,
-    old_variables: &[f32],
-    variables: &mut AlignedSlice<f32>,
+    old_variables: &Variables,
+    variables: &mut Variables,
     step_factors: &[f32],
-    fluxes: &[f32],
+    fluxes: &Variables,
 ) {
     for i in 0..nelr {
         let factor = step_factors[i] / (RK + 1 - j) as f32;
-        variables[i + VAR_DENSITY * nelr] =
-            old_variables[i + VAR_DENSITY * nelr] + factor * fluxes[i + VAR_DENSITY * nelr];
-        variables[i + (VAR_MOMENTUM + 0) * nelr] = old_variables[i + (VAR_MOMENTUM + 0) * nelr]
-            + factor * fluxes[i + (VAR_MOMENTUM + 0) * nelr];
-        variables[i + (VAR_MOMENTUM + 1) * nelr] = old_variables[i + (VAR_MOMENTUM + 1) * nelr]
-            + factor * fluxes[i + (VAR_MOMENTUM + 1) * nelr];
-        variables[i + (VAR_MOMENTUM + 2) * nelr] = old_variables[i + (VAR_MOMENTUM + 2) * nelr]
-            + factor * fluxes[i + (VAR_MOMENTUM + 2) * nelr];
-        variables[i + VAR_DENSITY_ENERGY * nelr] = old_variables[i + VAR_DENSITY_ENERGY * nelr]
-            + factor * fluxes[i + VAR_DENSITY_ENERGY * nelr];
+        variables.density[i] = old_variables.density[i] + factor * fluxes.density[i];
+        variables.momentum.x[i] = old_variables.momentum.x[i] + factor * fluxes.momentum.x[i];
+        variables.momentum.y[i] = old_variables.momentum.y[i] + factor * fluxes.momentum.y[i];
+        variables.momentum.z[i] = old_variables.momentum.z[i] + factor * fluxes.momentum.z[i];
+        variables.energy[i] = old_variables.energy[i] + factor * fluxes.energy[i];
     }
 }
 
 pub fn euler(
     nelr: usize,
     iterations: usize,
-    mut variables: AlignedSlice<f32>,
+    mut variables: Variables,
     areas: &[f32],
     elements_surrounding_elements: &[i32],
-    normals: &[f32],
-    ff_variable: &[f32],
+    normals: &Normals,
+    ff_variable: &Variable,
     ff_fc_density_energy: &Float3,
     ff_fc_momentum_x: &Float3,
     ff_fc_momentum_y: &Float3,
     ff_fc_momentum_z: &Float3,
-) -> AlignedSlice<f32> {
+) -> Variables {
     for i in 0..iterations {
         let old_variables = variables.clone();
-        let step_factors = compute_step_factor(nelr, variables.as_slice(), areas);
+        let step_factors = compute_step_factor(nelr, &variables, areas);
 
         for j in 0..RK {
             let fluxes = compute_flux_euler(
                 nelr,
-                variables.as_slice(),
+                &variables,
                 elements_surrounding_elements,
                 normals,
                 ff_variable,
@@ -530,7 +541,7 @@ pub fn euler(
             time_step(
                 j,
                 nelr,
-                old_variables.as_slice(),
+                &old_variables,
                 &mut variables,
                 &step_factors,
                 &fluxes,
@@ -544,26 +555,26 @@ pub fn euler(
 pub fn pre_euler(
     nelr: usize,
     iterations: usize,
-    mut variables: AlignedSlice<f32>,
+    mut variables: Variables,
     areas: &[f32],
     elements_surrounding_elements: &[i32],
-    normals: &[f32],
-    ff_variable: &[f32],
+    normals: &Normals,
+    ff_variable: &Variable,
     ff_fc_density_energy: &Float3,
     ff_fc_momentum_x: &Float3,
     ff_fc_momentum_y: &Float3,
     ff_fc_momentum_z: &Float3,
-) -> AlignedSlice<f32> {
+) -> Variables {
     for i in 0..iterations {
         let old_variables = variables.clone();
-        let step_factors = compute_step_factor(nelr, variables.as_slice(), areas);
+        let step_factors = compute_step_factor(nelr, &variables, areas);
 
         for j in 0..RK {
             let (fc_momentum_x, fc_momentum_y, fc_momentum_z, fc_density_energy) =
-                compute_flux_contributions(nelr, variables.as_slice());
+                compute_flux_contributions(nelr, &variables);
             let fluxes = compute_flux_pre_euler(
                 nelr,
-                variables.as_slice(),
+                &variables,
                 elements_surrounding_elements,
                 normals,
                 &fc_momentum_x,
@@ -579,7 +590,7 @@ pub fn pre_euler(
             time_step(
                 j,
                 nelr,
-                old_variables.as_slice(),
+                &old_variables,
                 &mut variables,
                 &step_factors,
                 &fluxes,
diff --git a/juno_samples/rodinia/cfd/src/setup.rs b/juno_samples/rodinia/cfd/src/setup.rs
index b996f057..0da00251 100644
--- a/juno_samples/rodinia/cfd/src/setup.rs
+++ b/juno_samples/rodinia/cfd/src/setup.rs
@@ -1,10 +1,10 @@
+use std::fmt::{Debug, Error, Formatter};
 use std::fs::File;
 use std::io::Read;
+use std::ops::{Index, IndexMut};
 use std::path::Path;
 use std::str::FromStr;
 
-use std::ops::{Index, IndexMut};
-
 use nom::Parser;
 
 use crate::rust_cfd;
@@ -14,10 +14,6 @@ pub const ff_mach: f32 = 1.2;
 pub const NDIM: usize = 3;
 pub const NNB: usize = 4;
 pub const RK: usize = 3;
-pub const VAR_DENSITY: usize = 0;
-pub const VAR_MOMENTUM: usize = 1;
-pub const VAR_DENSITY_ENERGY: usize = VAR_MOMENTUM + NDIM;
-pub const NVAR: usize = VAR_DENSITY_ENERGY + 1;
 pub const deg_angle_of_attack: f32 = 0.0;
 
 #[repr(align(64))]
@@ -31,6 +27,15 @@ pub struct AlignedSlice<T> {
     length: usize,
 }
 
+impl<T> Debug for AlignedSlice<T>
+where
+    T: Debug
+{
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
+        write!(f, "{:?}", self.as_slice())
+    }
+}
+
 impl<T> AlignedSlice<T>
 where
     T: Default,
@@ -53,6 +58,17 @@ where
     }
 }
 
+impl<T> AlignedSlice<T>
+where
+    T: Clone + Default,
+{
+    pub fn from_slice(slice: &[T]) -> Self {
+        let mut res = Self::of_len(slice.len());
+        res.as_mut_slice().clone_from_slice(slice);
+        res
+    }
+}
+
 impl<T> AlignedSlice<T> {
     pub fn as_slice(&self) -> &[T] {
         &self.slice[self.offset..self.offset + self.length]
@@ -90,15 +106,20 @@ where
     }
 }
 
-#[repr(C)]
 pub struct Float3 {
     pub x: f32,
     pub y: f32,
     pub z: f32,
 }
 
+pub struct Variable {
+    pub density: f32,
+    pub momentum: Float3,
+    pub energy: f32,
+}
+
 pub struct FarFieldConditions {
-    pub ff_variable: AlignedSlice<f32>,
+    pub ff_variable: Variable,
     pub ff_fc_momentum_x: Float3,
     pub ff_fc_momentum_y: Float3,
     pub ff_fc_momentum_z: Float3,
@@ -106,14 +127,14 @@ pub struct FarFieldConditions {
 }
 
 pub fn set_far_field_conditions() -> FarFieldConditions {
-    let mut ff_variable: AlignedSlice<f32> = AlignedSlice::of_len(NVAR);
+    let mut ff_variable = Variable { density: 0.0, momentum: Float3 { x: 0.0, y: 0.0, z: 0.0 }, energy: 0.0 };
 
     let angle_of_attack = std::f32::consts::PI / 180.0 * deg_angle_of_attack;
 
-    ff_variable[VAR_DENSITY] = 1.4;
+    ff_variable.density = 1.4;
 
     let ff_pressure = 1.0;
-    let ff_speed_of_sound = (GAMMA * ff_pressure / ff_variable[VAR_DENSITY]).sqrt();
+    let ff_speed_of_sound = (GAMMA * ff_pressure / ff_variable.density).sqrt();
     let ff_speed = ff_mach * ff_speed_of_sound;
 
     let ff_velocity = Float3 {
@@ -122,23 +143,23 @@ pub fn set_far_field_conditions() -> FarFieldConditions {
         z: 0.0,
     };
 
-    ff_variable[VAR_MOMENTUM + 0] = ff_variable[VAR_DENSITY] * ff_velocity.x;
-    ff_variable[VAR_MOMENTUM + 1] = ff_variable[VAR_DENSITY] * ff_velocity.y;
-    ff_variable[VAR_MOMENTUM + 2] = ff_variable[VAR_DENSITY] * ff_velocity.z;
+    ff_variable.momentum.x = ff_variable.density * ff_velocity.x;
+    ff_variable.momentum.y = ff_variable.density * ff_velocity.y;
+    ff_variable.momentum.z = ff_variable.density * ff_velocity.z;
 
-    ff_variable[VAR_DENSITY_ENERGY] =
-        ff_variable[VAR_DENSITY] * (0.5 * (ff_speed * ff_speed)) + (ff_pressure / (GAMMA - 1.0));
+    ff_variable.energy =
+        ff_variable.density * (0.5 * (ff_speed * ff_speed)) + (ff_pressure / (GAMMA - 1.0));
 
     let ff_momentum = Float3 {
-        x: ff_variable[VAR_MOMENTUM + 0],
-        y: ff_variable[VAR_MOMENTUM + 1],
-        z: ff_variable[VAR_MOMENTUM + 2],
+        x: ff_variable.momentum.x,
+        y: ff_variable.momentum.y,
+        z: ff_variable.momentum.z,
     };
     let (ff_fc_momentum_x, ff_fc_momentum_y, ff_fc_momentum_z, ff_fc_density_energy) =
         rust_cfd::compute_flux_contribution(
-            ff_variable[VAR_DENSITY],
+            ff_variable.density,
             &ff_momentum,
-            ff_variable[VAR_DENSITY_ENERGY],
+            ff_variable.energy,
             ff_pressure,
             &ff_velocity,
         );
@@ -152,11 +173,17 @@ pub fn set_far_field_conditions() -> FarFieldConditions {
     }
 }
 
+pub struct Normals {
+    pub x: AlignedSlice<f32>,
+    pub y: AlignedSlice<f32>,
+    pub z: AlignedSlice<f32>,
+}
+
 pub struct GeometryData {
     pub nelr: usize,
     pub areas: AlignedSlice<f32>,
     pub elements_surrounding_elements: AlignedSlice<i32>,
-    pub normals: AlignedSlice<f32>,
+    pub normals: Normals,
 }
 
 pub fn read_domain_geometry<P>(path: P, block_size: usize) -> GeometryData
@@ -183,7 +210,11 @@ fn cfd_parser<'a>(block_size: usize, text: &'a str) -> nom::IResult<&'a str, Geo
 
     let mut areas: AlignedSlice<f32> = AlignedSlice::of_len(nelr);
     let mut elements_surrounding_elements: AlignedSlice<i32> = AlignedSlice::of_len(nelr * NNB);
-    let mut normals: AlignedSlice<f32> = AlignedSlice::of_len(NDIM * NNB * nelr);
+    let mut normals = Normals {
+        x: AlignedSlice::of_len(NNB * nelr),
+        y: AlignedSlice::of_len(NNB * nelr),
+        z: AlignedSlice::of_len(NNB * nelr),
+    };
 
     // read in data
     let mut text = text;
@@ -209,14 +240,21 @@ fn cfd_parser<'a>(block_size: usize, text: &'a str) -> nom::IResult<&'a str, Geo
             let val = i32::from_str(val).unwrap();
             let val = if neg { -val } else { val };
             elements_surrounding_elements[i + j * nelr] = if val < 0 { -1 } else { val } - 1; // it's coming in with Fortran numbering
-
-            for k in 0..NDIM {
-                let t = nom::character::complete::multispace0(text)?.0;
-                let (t, val) = nom::number::complete::float(t)?;
-                text = t;
-
-                normals[i + (j + k * NNB) * nelr] = -val;
-            }
+                
+            let t = nom::character::complete::multispace0(text)?.0;
+            let (t, val) = nom::number::complete::float(t)?;
+            text = t;
+            normals.x[i + j * nelr] = -val;
+                
+            let t = nom::character::complete::multispace0(text)?.0;
+            let (t, val) = nom::number::complete::float(t)?;
+            text = t;
+            normals.y[i + j * nelr] = -val;
+                
+            let t = nom::character::complete::multispace0(text)?.0;
+            let (t, val) = nom::number::complete::float(t)?;
+            text = t;
+            normals.z[i + j * nelr] = -val;
         }
     }
 
@@ -227,9 +265,9 @@ fn cfd_parser<'a>(block_size: usize, text: &'a str) -> nom::IResult<&'a str, Geo
         for j in 0..NNB {
             elements_surrounding_elements[i + j * nelr] =
                 elements_surrounding_elements[last + j * nelr];
-            for k in 0..NDIM {
-                normals[i + (j + k * NNB) * nelr] = normals[last + (j + k * NNB) * nelr];
-            }
+            normals.x[i + j * nelr] = normals.x[last + j * nelr];
+            normals.y[i + j * nelr] = normals.y[last + j * nelr];
+            normals.z[i + j * nelr] = normals.z[last + j * nelr];
         }
     }
 
@@ -245,13 +283,37 @@ fn cfd_parser<'a>(block_size: usize, text: &'a str) -> nom::IResult<&'a str, Geo
     ))
 }
 
-pub fn initialize_variables(nelr: usize, ff_variable: &[f32]) -> AlignedSlice<f32> {
-    let mut variables = AlignedSlice::of_len(nelr * NVAR);
+#[derive(Clone)]
+pub struct Momentum {
+    pub x: AlignedSlice<f32>,
+    pub y: AlignedSlice<f32>,
+    pub z: AlignedSlice<f32>,
+}
+
+#[derive(Clone)]
+pub struct Variables {
+    pub density: AlignedSlice<f32>,
+    pub momentum: Momentum,
+    pub energy: AlignedSlice<f32>,
+}
+
+pub fn initialize_variables(nelr: usize, ff_variable: &Variable) -> Variables {
+    let mut variables = Variables {
+        density: AlignedSlice::of_len(nelr),
+        momentum: Momentum {
+            x: AlignedSlice::of_len(nelr),
+            y: AlignedSlice::of_len(nelr),
+            z: AlignedSlice::of_len(nelr),
+        },
+        energy: AlignedSlice::of_len(nelr),
+    };
 
     for i in 0..nelr {
-        for j in 0..NVAR {
-            variables[i + j * nelr] = ff_variable[j];
-        }
+        variables.density[i] = ff_variable.density;
+        variables.momentum.x[i] = ff_variable.momentum.x;
+        variables.momentum.y[i] = ff_variable.momentum.y;
+        variables.momentum.z[i] = ff_variable.momentum.z;
+        variables.energy[i] = ff_variable.energy;
     }
 
     variables
-- 
GitLab


From 1dd164a4f31255db25830a49d1d047901963fe4c Mon Sep 17 00:00:00 2001
From: Aaron Councilman <aaronjc4@illinois.edu>
Date: Sat, 1 Mar 2025 13:47:58 -0600
Subject: [PATCH 5/6] Fix cfd bench

---
 juno_samples/rodinia/cfd/benches/cfd_bench.rs | 122 +++++++++++-------
 1 file changed, 76 insertions(+), 46 deletions(-)

diff --git a/juno_samples/rodinia/cfd/benches/cfd_bench.rs b/juno_samples/rodinia/cfd/benches/cfd_bench.rs
index fd614b42..5fc73db9 100644
--- a/juno_samples/rodinia/cfd/benches/cfd_bench.rs
+++ b/juno_samples/rodinia/cfd/benches/cfd_bench.rs
@@ -28,40 +28,55 @@ fn cfd_bench(c: &mut Criterion) {
         elements_surrounding_elements,
         normals,
     } = read_domain_geometry(data_file, block_size);
-    let mut variables = initialize_variables(nelr, ff_variable.as_slice());
-    let mut variables = HerculesMutBox::from(variables.as_mut_slice());
+    let mut variables = initialize_variables(nelr, &ff_variable);
+
+    let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice());
+    let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice());
+    let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice());
+    let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice());
+    let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice());
+
     let areas = HerculesImmBox::from(areas.as_slice());
     let elements_surrounding_elements =
         HerculesImmBox::from(elements_surrounding_elements.as_slice());
-    let normals = HerculesImmBox::from(normals.as_slice());
-    let ff_variable = HerculesImmBox::from(ff_variable.as_slice());
-    let ff_fc_density_energy = vec![
-        ff_fc_density_energy.x,
-        ff_fc_density_energy.y,
-        ff_fc_density_energy.z,
-    ];
-    let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice());
-    let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z];
-    let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice());
-    let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z];
-    let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice());
-    let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z];
-    let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice());
+
+    let normals_x = HerculesImmBox::from(normals.x.as_slice());
+    let normals_y = HerculesImmBox::from(normals.y.as_slice());
+    let normals_z = HerculesImmBox::from(normals.z.as_slice());
+
     group.bench_function("cfd bench euler", |b| {
         b.iter(|| {
             async_std::task::block_on(async {
                 r.run(
                     nelr as u64,
                     iterations as u64,
-                    variables.to(),
+                    v_density.to(),
+                    v_momentum_x.to(),
+                    v_momentum_y.to(),
+                    v_momentum_z.to(),
+                    v_energy.to(),
                     areas.to(),
                     elements_surrounding_elements.to(),
-                    normals.to(),
-                    ff_variable.to(),
-                    ff_fc_density_energy.to(),
-                    ff_fc_momentum_x.to(),
-                    ff_fc_momentum_y.to(),
-                    ff_fc_momentum_z.to(),
+                    normals_x.to(),
+                    normals_y.to(),
+                    normals_z.to(),
+                    ff_variable.density,
+                    ff_variable.momentum.x,
+                    ff_variable.momentum.y,
+                    ff_variable.momentum.z,
+                    ff_variable.energy,
+                    ff_fc_density_energy.x,
+                    ff_fc_density_energy.y,
+                    ff_fc_density_energy.z,
+                    ff_fc_momentum_x.x,
+                    ff_fc_momentum_x.y,
+                    ff_fc_momentum_x.z,
+                    ff_fc_momentum_y.x,
+                    ff_fc_momentum_y.y,
+                    ff_fc_momentum_y.z,
+                    ff_fc_momentum_z.x,
+                    ff_fc_momentum_z.y,
+                    ff_fc_momentum_z.z,
                 )
                 .await
             });
@@ -85,40 +100,55 @@ fn cfd_bench(c: &mut Criterion) {
         elements_surrounding_elements,
         normals,
     } = read_domain_geometry(data_file, block_size);
-    let mut variables = initialize_variables(nelr, ff_variable.as_slice());
-    let mut variables = HerculesMutBox::from(variables.as_mut_slice());
+    let mut variables = initialize_variables(nelr, &ff_variable);
+
+    let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice());
+    let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice());
+    let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice());
+    let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice());
+    let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice());
+
     let areas = HerculesImmBox::from(areas.as_slice());
     let elements_surrounding_elements =
         HerculesImmBox::from(elements_surrounding_elements.as_slice());
-    let normals = HerculesImmBox::from(normals.as_slice());
-    let ff_variable = HerculesImmBox::from(ff_variable.as_slice());
-    let ff_fc_density_energy = vec![
-        ff_fc_density_energy.x,
-        ff_fc_density_energy.y,
-        ff_fc_density_energy.z,
-    ];
-    let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice());
-    let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z];
-    let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice());
-    let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z];
-    let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice());
-    let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z];
-    let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice());
+
+    let normals_x = HerculesImmBox::from(normals.x.as_slice());
+    let normals_y = HerculesImmBox::from(normals.y.as_slice());
+    let normals_z = HerculesImmBox::from(normals.z.as_slice());
+
     group.bench_function("cfd bench pre-euler", |b| {
         b.iter(|| {
             async_std::task::block_on(async {
                 r.run(
                     nelr as u64,
                     iterations as u64,
-                    variables.to(),
+                    v_density.to(),
+                    v_momentum_x.to(),
+                    v_momentum_y.to(),
+                    v_momentum_z.to(),
+                    v_energy.to(),
                     areas.to(),
                     elements_surrounding_elements.to(),
-                    normals.to(),
-                    ff_variable.to(),
-                    ff_fc_density_energy.to(),
-                    ff_fc_momentum_x.to(),
-                    ff_fc_momentum_y.to(),
-                    ff_fc_momentum_z.to(),
+                    normals_x.to(),
+                    normals_y.to(),
+                    normals_z.to(),
+                    ff_variable.density,
+                    ff_variable.momentum.x,
+                    ff_variable.momentum.y,
+                    ff_variable.momentum.z,
+                    ff_variable.energy,
+                    ff_fc_density_energy.x,
+                    ff_fc_density_energy.y,
+                    ff_fc_density_energy.z,
+                    ff_fc_momentum_x.x,
+                    ff_fc_momentum_x.y,
+                    ff_fc_momentum_x.z,
+                    ff_fc_momentum_y.x,
+                    ff_fc_momentum_y.y,
+                    ff_fc_momentum_y.z,
+                    ff_fc_momentum_z.x,
+                    ff_fc_momentum_z.y,
+                    ff_fc_momentum_z.z,
                 )
                 .await
             });
-- 
GitLab


From 5fb96a38840c87d68d131832ac18cd68cc297078 Mon Sep 17 00:00:00 2001
From: Aaron Councilman <aaronjc4@illinois.edu>
Date: Sat, 1 Mar 2025 14:06:37 -0600
Subject: [PATCH 6/6] Fix gpu schedule

---
 juno_samples/rodinia/cfd/src/gpu_euler.sch    | 47 ++++++++++++-------
 .../rodinia/cfd/src/gpu_pre_euler.sch         | 45 +++++++++++-------
 2 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/juno_samples/rodinia/cfd/src/gpu_euler.sch b/juno_samples/rodinia/cfd/src/gpu_euler.sch
index 7f7ee42c..aed6115e 100644
--- a/juno_samples/rodinia/cfd/src/gpu_euler.sch
+++ b/juno_samples/rodinia/cfd/src/gpu_euler.sch
@@ -1,23 +1,34 @@
-gvn(*);
-dce(*);
-phi-elim(*);
-dce(*);
-crc(*);
-dce(*);
-slf(*);
-dce(*);
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  crc(X);
+  slf(X);
+  dce(X);
+  infer-schedules(X);
+}
 
-let auto = auto-outline(euler);
-gpu(auto.euler);
-
-inline(auto.euler);
-inline(auto.euler);
+simpl!(*);
+inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step);
 delete-uncalled(*);
+gpu(copy_vars, compute_step_factor, compute_flux, time_step);
 
-sroa[false](auto.euler);
-dce(*);
-float-collections(*);
-dce(*);
+simpl!(*);
+ip-sroa[true](*);
+sroa[true](*);
+predication(*);
+const-inline(*);
+simpl!(*);
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+}
+simpl!(*);
+no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res);
+parallel-reduce(time_step, copy_vars, compute_flux@outer_loop \ compute_flux@inner_loop);
 
+unforkify(*);
+float-collections(*);
 gcm(*);
-
diff --git a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
index 33c46dab..d91f1b00 100644
--- a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
+++ b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch
@@ -1,23 +1,32 @@
-gvn(*);
-dce(*);
-phi-elim(*);
-dce(*);
-crc(*);
-dce(*);
-slf(*);
-dce(*);
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  crc(X);
+  slf(X);
+  dce(X);
+  infer-schedules(X);
+}
 
-let auto = auto-outline(pre_euler);
-gpu(auto.pre_euler);
-
-inline(auto.pre_euler);
-inline(auto.pre_euler);
+simpl!(*);
+inline(compute_step_factor, compute_flux, compute_flux_contributions, compute_flux_contribution, time_step);
 delete-uncalled(*);
+gpu(copy_vars, compute_step_factor, compute_flux_contributions, compute_flux, time_step);
 
-sroa[false](auto.pre_euler);
-dce(*);
-float-collections(*);
-dce(*);
+simpl!(*);
+ip-sroa[true](*);
+sroa[true](*);
+predication(*);
+const-inline(*);
+simpl!(*);
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+}
+simpl!(*);
 
+unforkify(*);
+float-collections(*);
 gcm(*);
-
-- 
GitLab