From ff03fe68238c360455e1240766a77bd3ce494052 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Fri, 28 Feb 2025 18:06:10 -0600 Subject: [PATCH 1/6] ip-sroa params first attempt --- hercules_ir/src/typecheck.rs | 8 +- hercules_opt/src/interprocedural_sroa.rs | 123 ++++++++++++++++++----- 2 files changed, 106 insertions(+), 25 deletions(-) diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs index 2a3f9fb1..b2567b8f 100644 --- a/hercules_ir/src/typecheck.rs +++ b/hercules_ir/src/typecheck.rs @@ -716,8 +716,10 @@ fn typeflow( // Check number of run-time arguments. if inputs.len() - 1 != callee.param_types.len() { return Error(format!( - "Call node has {} inputs, but calls a function with {} parameters.", + "Call node in {} has {} inputs, but calls a function ({}) with {} parameters.", + function.name, inputs.len() - 1, + callee.name, callee.param_types.len(), )); } @@ -725,8 +727,10 @@ fn typeflow( // Check number of dynamic constant arguments. if dc_args.len() != callee.num_dynamic_constants as usize { return Error(format!( - "Call node references {} dynamic constants, but calls a function expecting {} dynamic constants.", + "Call node in {} references {} dynamic constants, but calls a function ({}) expecting {} dynamic constants.", + function.name, dc_args.len(), + callee.name, callee.num_dynamic_constants )); } diff --git a/hercules_opt/src/interprocedural_sroa.rs b/hercules_opt/src/interprocedural_sroa.rs index ad4ce19e..044d3590 100644 --- a/hercules_opt/src/interprocedural_sroa.rs +++ b/hercules_opt/src/interprocedural_sroa.rs @@ -39,46 +39,91 @@ pub fn interprocedural_sroa( } let editor: &mut FunctionEditor = &mut editors[func_id.idx()]; + let param_types = &editor.func().param_types.to_vec(); let return_types = &editor.func().return_types.to_vec(); - // We determine the new return types of the function and track a map - // that tells us how the old return values are constructed from the - // new ones + // We determine the new param/return types of the function and track a + // map that tells us how the old param/return values are constructed + // from the new ones. + let mut new_param_types = vec![]; + let mut old_param_type_map = vec![]; let mut new_return_types = vec![]; let mut old_return_type_map = vec![]; let mut changed = false; - for ret_typ in return_types.iter() { + for ret_typ in param_types.iter() { if !can_sroa_type(editor, *ret_typ) { + old_param_type_map.push(IndexTree::Leaf(new_param_types.len())); + new_param_types.push(*ret_typ); + } else { + let (types, index) = sroa_type(editor, *ret_typ, new_param_types.len()); + old_param_type_map.push(index); + new_param_types.extend(types); + changed = true; + } + } + + for par_typ in return_types.iter() { + if !can_sroa_type(editor, *par_typ) { old_return_type_map.push(IndexTree::Leaf(new_return_types.len())); - new_return_types.push(*ret_typ); + new_return_types.push(*par_typ); } else { - let (types, index) = sroa_type(editor, *ret_typ, new_return_types.len()); + let (types, index) = sroa_type(editor, *par_typ, new_return_types.len()); old_return_type_map.push(index); new_return_types.extend(types); changed = true; } } - // If the return type is not changed by IP SROA, skip to the next function + // If the param/return types aren't changed by IP SROA, skip to the next + // function. if !changed { continue; } - // Now, modify each return in the current function and the return type - let return_nodes = editor - .func() - .nodes - .iter() - .enumerate() - .filter_map(|(idx, node)| { - if node.try_return().is_some() { - Some(NodeID::new(idx)) + // Modify each parameter in the current function and the param types. + let mut param_nodes: Vec<_> = vec![vec![]; param_types.len()]; + for id in editor.node_ids() { + if let Some(idx) = editor.func().nodes[id.idx()].try_parameter() { + param_nodes[idx].push(id); + } + } + println!("{}", editor.func().name); + let success = editor.edit(|mut edit| { + for (idx, ids) in param_nodes.into_iter().enumerate() { + let new_indices = &old_param_type_map[idx]; + let built = if let IndexTree::Leaf(new_idx) = new_indices { + edit.add_node(Node::Parameter { index: *new_idx }) } else { - None + let prod_ty = param_types[idx]; + let cons = edit.add_zero_constant(prod_ty); + let mut cons = edit.add_node(Node::Constant { id: cons }); + new_indices.for_each(|idx: &Vec<Index>, param_idx: &usize| { + let param = edit.add_node(Node::Parameter { index: *param_idx }); + cons = edit.add_node(Node::Write { + collect: cons, + data: param, + indices: idx.clone().into_boxed_slice(), + }); + }); + cons + }; + for id in ids { + edit = edit.replace_all_uses(id, built)?; + edit = edit.delete_node(id)?; } - }) - .collect::<Vec<_>>(); + } + + edit.set_param_types(new_param_types); + Ok(edit) + }); + assert!(success, "IP SROA expects to be able to edit everything, specify what functions to IP SROA via the func_selection argument"); + + // Modify each return in the current function and the return types. + let return_nodes: Vec<_> = editor + .node_ids() + .filter(|id| editor.func().nodes[id.idx()].is_return()) + .collect(); let success = editor.edit(|mut edit| { for node in return_nodes { let Node::Return { control, data } = edit.get_node(node) else { @@ -114,17 +159,15 @@ pub fn interprocedural_sroa( } edit.set_return_types(new_return_types); - Ok(edit) }); assert!(success, "IP SROA expects to be able to edit everything, specify what functions to IP SROA via the func_selection argument"); - // Finally, update calls of this function - // In particular, we actually don't have to update the call node at all but have to update - // its DataProjection users + // Finally, update calls of this function. for (caller, callsite) in callsites { let editor = &mut editors[caller.idx()]; assert!(editor.func_id() == caller); + let projs = editor.get_users(callsite).collect::<Vec<_>>(); for proj_id in projs { let Node::DataProjection { data: _, selection } = editor.node(proj_id) else { @@ -134,6 +177,40 @@ pub fn interprocedural_sroa( let typ = types[caller.idx()][proj_id.idx()]; replace_returned_value(editor, proj_id, typ, new_return_info, callsite); } + + let (control, callee, dc_args, args) = + editor.func().nodes[callsite.idx()].try_call().unwrap(); + let dc_args = dc_args.clone(); + let args = args.clone(); + let success = editor.edit(|mut edit| { + let mut new_args = vec![]; + for (idx, (data_id, update_info)) in + args.iter().zip(old_param_type_map.iter()).enumerate() + { + if let IndexTree::Leaf(new_idx) = update_info { + // Unchanged parameter value + assert!(new_args.len() == *new_idx); + new_args.push(*data_id); + } else { + // SROA'd parameter value + let reads = generate_reads_edit(&mut edit, param_types[idx], *data_id); + reads.zip(update_info).for_each(|_, (read_id, ret_idx)| { + assert!(new_args.len() == **ret_idx); + new_args.push(*read_id); + }); + } + } + let new_call = edit.add_node(Node::Call { + control, + function: callee, + dynamic_constants: dc_args, + args: new_args.into_boxed_slice(), + }); + edit = edit.replace_all_uses(callsite, new_call)?; + edit = edit.delete_node(callsite)?; + Ok(edit) + }); + assert!(success); } } } -- GitLab From 263b2e0c82483c2d51b76b9fb9ca60e5fd244940 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Fri, 28 Feb 2025 19:28:58 -0600 Subject: [PATCH 2/6] whoops --- hercules_opt/src/interprocedural_sroa.rs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/hercules_opt/src/interprocedural_sroa.rs b/hercules_opt/src/interprocedural_sroa.rs index 044d3590..c7ea5836 100644 --- a/hercules_opt/src/interprocedural_sroa.rs +++ b/hercules_opt/src/interprocedural_sroa.rs @@ -1,6 +1,3 @@ -use std::collections::HashMap; -use std::iter::zip; - use hercules_ir::ir::*; use crate::*; @@ -51,24 +48,24 @@ pub fn interprocedural_sroa( let mut old_return_type_map = vec![]; let mut changed = false; - for ret_typ in param_types.iter() { - if !can_sroa_type(editor, *ret_typ) { + for par_typ in param_types.iter() { + if !can_sroa_type(editor, *par_typ) { old_param_type_map.push(IndexTree::Leaf(new_param_types.len())); - new_param_types.push(*ret_typ); + new_param_types.push(*par_typ); } else { - let (types, index) = sroa_type(editor, *ret_typ, new_param_types.len()); + let (types, index) = sroa_type(editor, *par_typ, new_param_types.len()); old_param_type_map.push(index); new_param_types.extend(types); changed = true; } } - for par_typ in return_types.iter() { - if !can_sroa_type(editor, *par_typ) { + for ret_typ in return_types.iter() { + if !can_sroa_type(editor, *ret_typ) { old_return_type_map.push(IndexTree::Leaf(new_return_types.len())); - new_return_types.push(*par_typ); + new_return_types.push(*ret_typ); } else { - let (types, index) = sroa_type(editor, *par_typ, new_return_types.len()); + let (types, index) = sroa_type(editor, *ret_typ, new_return_types.len()); old_return_type_map.push(index); new_return_types.extend(types); changed = true; -- GitLab From f3ddf1d2e3b74be58efd5a434ad8f7a9f6711b20 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Fri, 28 Feb 2025 19:42:08 -0600 Subject: [PATCH 3/6] Handle outputs --- juno_samples/rodinia/cfd/src/cpu_euler.sch | 4 +- .../rodinia/cfd/src/cpu_pre_euler.sch | 4 +- juno_samples/rodinia/cfd/src/lib.rs | 39 ++++++++++++++----- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch index 1244f80e..4cf320a6 100644 --- a/juno_samples/rodinia/cfd/src/cpu_euler.sch +++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch @@ -14,8 +14,8 @@ simpl!(*); inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step); delete-uncalled(*); simpl!(*); -ip-sroa[false](*); -sroa[false](*); +ip-sroa[true](*); +sroa[true](*); predication(*); const-inline(*); simpl!(*); diff --git a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch index 6329c504..14eb6906 100644 --- a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch +++ b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch @@ -14,8 +14,8 @@ simpl!(*); inline(compute_step_factor, compute_flux, compute_flux_contributions, compute_flux_contribution, time_step); delete-uncalled(*); simpl!(*); -ip-sroa[false](*); -sroa[false](*); +ip-sroa[true](*); +sroa[true](*); predication(*); const-inline(*); simpl!(*); diff --git a/juno_samples/rodinia/cfd/src/lib.rs b/juno_samples/rodinia/cfd/src/lib.rs index 39384c0d..e054a605 100644 --- a/juno_samples/rodinia/cfd/src/lib.rs +++ b/juno_samples/rodinia/cfd/src/lib.rs @@ -55,8 +55,7 @@ fn run_euler( let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); let mut runner = runner!(euler); - - HerculesMutBox::from(async_std::task::block_on(async { + let (density, x_m, y_m, z_m, energy) = async_std::task::block_on(async { runner .run( nelr as u64, @@ -72,9 +71,20 @@ fn run_euler( ff_fc_momentum_z.to(), ) .await - })) - .as_slice() - .to_vec() + }); + + let total = vec![]; + let density = HerculesMutBox::from(density).as_slice(); + let x_m = HerculesMutBox::from(x_m).as_slice(); + let y_m = HerculesMutBox::from(y_m).as_slice(); + let z_m = HerculesMutBox::from(z_m).as_slice(); + let energy = HerculesMutBox::from(energy).as_slice(); + total.extend(density.into_iter().map(|id: &mut f32| *id)); + total.extend(x_m.into_iter().map(|id: &mut f32| *id)); + total.extend(y_m.into_iter().map(|id: &mut f32| *id)); + total.extend(z_m.into_iter().map(|id: &mut f32| *id)); + total.extend(energy.into_iter().map(|id: &mut f32| *id)); + total } fn run_pre_euler( @@ -114,7 +124,7 @@ fn run_pre_euler( let variables = variables.to(); - HerculesMutBox::from(async_std::task::block_on(async { + let (density, x_m, y_m, z_m, energy) = async_std::task::block_on(async { runner .run( nelr as u64, @@ -130,9 +140,20 @@ fn run_pre_euler( ff_fc_momentum_z.to(), ) .await - })) - .as_slice() - .to_vec() + }); + + let total = vec![]; + let density = HerculesMutBox::from(density).as_slice(); + let x_m = HerculesMutBox::from(x_m).as_slice(); + let y_m = HerculesMutBox::from(y_m).as_slice(); + let z_m = HerculesMutBox::from(z_m).as_slice(); + let energy = HerculesMutBox::from(energy).as_slice(); + total.extend(density.into_iter().map(|id: &mut f32| *id)); + total.extend(x_m.into_iter().map(|id: &mut f32| *id)); + total.extend(y_m.into_iter().map(|id: &mut f32| *id)); + total.extend(z_m.into_iter().map(|id: &mut f32| *id)); + total.extend(energy.into_iter().map(|id: &mut f32| *id)); + total } fn compare_float(x: f32, y: f32) -> bool { -- GitLab From cdcc0afded55bd30537366c3225e4d786e407c77 Mon Sep 17 00:00:00 2001 From: Aaron Councilman <aaronjc4@illinois.edu> Date: Sat, 1 Mar 2025 13:43:19 -0600 Subject: [PATCH 4/6] Modify cfd to break up input products --- juno_samples/rodinia/cfd/src/lib.rs | 229 +++++++++++++---------- juno_samples/rodinia/cfd/src/rust_cfd.rs | 195 ++++++++++--------- juno_samples/rodinia/cfd/src/setup.rs | 140 ++++++++++---- 3 files changed, 335 insertions(+), 229 deletions(-) diff --git a/juno_samples/rodinia/cfd/src/lib.rs b/juno_samples/rodinia/cfd/src/lib.rs index e054a605..62ee59f4 100644 --- a/juno_samples/rodinia/cfd/src/lib.rs +++ b/juno_samples/rodinia/cfd/src/lib.rs @@ -24,144 +24,180 @@ pub struct CFDInputs { fn run_euler( nelr: usize, iterations: usize, - mut variables: AlignedSlice<f32>, + mut variables: Variables, areas: &[f32], elements_surrounding_elements: &[i32], - normals: &[f32], - ff_variable: &[f32], + normals: &Normals, + ff_variable: &Variable, ff_fc_density_energy: &Float3, ff_fc_momentum_x: &Float3, ff_fc_momentum_y: &Float3, ff_fc_momentum_z: &Float3, -) -> Vec<f32> { - let mut variables = HerculesMutBox::from(variables.as_mut_slice()); +) -> Variables { + let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice()); + let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice()); + let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice()); + let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice()); + let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice()); + let areas = HerculesImmBox::from(areas); let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); - let normals = HerculesImmBox::from(normals); - let ff_variable = HerculesImmBox::from(ff_variable); - // TODO: Make hercules box handle structs, for now we'll copy into a vec - let ff_fc_density_energy = vec![ - ff_fc_density_energy.x, - ff_fc_density_energy.y, - ff_fc_density_energy.z, - ]; - let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); - let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; - let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); - let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; - let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); - let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; - let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + let normals_x = HerculesImmBox::from(normals.x.as_slice()); + let normals_y = HerculesImmBox::from(normals.y.as_slice()); + let normals_z = HerculesImmBox::from(normals.z.as_slice()); let mut runner = runner!(euler); - let (density, x_m, y_m, z_m, energy) = async_std::task::block_on(async { + let (density, momentum_x, momentum_y, momentum_z, energy) = + async_std::task::block_on(async { runner .run( nelr as u64, iterations as u64, - variables.to(), + v_density.to(), + v_momentum_x.to(), + v_momentum_y.to(), + v_momentum_z.to(), + v_energy.to(), areas.to(), elements_surrounding_elements.to(), - normals.to(), - ff_variable.to(), - ff_fc_density_energy.to(), - ff_fc_momentum_x.to(), - ff_fc_momentum_y.to(), - ff_fc_momentum_z.to(), + normals_x.to(), + normals_y.to(), + normals_z.to(), + ff_variable.density, + ff_variable.momentum.x, + ff_variable.momentum.y, + ff_variable.momentum.z, + ff_variable.energy, + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ff_fc_momentum_x.x, + ff_fc_momentum_x.y, + ff_fc_momentum_x.z, + ff_fc_momentum_y.x, + ff_fc_momentum_y.y, + ff_fc_momentum_y.z, + ff_fc_momentum_z.x, + ff_fc_momentum_z.y, + ff_fc_momentum_z.z, ) .await }); - let total = vec![]; - let density = HerculesMutBox::from(density).as_slice(); - let x_m = HerculesMutBox::from(x_m).as_slice(); - let y_m = HerculesMutBox::from(y_m).as_slice(); - let z_m = HerculesMutBox::from(z_m).as_slice(); - let energy = HerculesMutBox::from(energy).as_slice(); - total.extend(density.into_iter().map(|id: &mut f32| *id)); - total.extend(x_m.into_iter().map(|id: &mut f32| *id)); - total.extend(y_m.into_iter().map(|id: &mut f32| *id)); - total.extend(z_m.into_iter().map(|id: &mut f32| *id)); - total.extend(energy.into_iter().map(|id: &mut f32| *id)); - total + Variables { + density: AlignedSlice::from_slice(HerculesMutBox::from(density).as_slice()), + momentum: Momentum { + x: AlignedSlice::from_slice(HerculesMutBox::from(momentum_x).as_slice()), + y: AlignedSlice::from_slice(HerculesMutBox::from(momentum_y).as_slice()), + z: AlignedSlice::from_slice(HerculesMutBox::from(momentum_z).as_slice()), + }, + energy: AlignedSlice::from_slice(HerculesMutBox::from(energy).as_slice()), + } } fn run_pre_euler( nelr: usize, iterations: usize, - mut variables: AlignedSlice<f32>, + mut variables: Variables, areas: &[f32], elements_surrounding_elements: &[i32], - normals: &[f32], - ff_variable: &[f32], + normals: &Normals, + ff_variable: &Variable, ff_fc_density_energy: &Float3, ff_fc_momentum_x: &Float3, ff_fc_momentum_y: &Float3, ff_fc_momentum_z: &Float3, -) -> Vec<f32> { - let mut variables = HerculesMutBox::from(variables.as_mut_slice()); +) -> Variables { + let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice()); + let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice()); + let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice()); + let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice()); + let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice()); + let areas = HerculesImmBox::from(areas); let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); - let normals = HerculesImmBox::from(normals); - let ff_variable = HerculesImmBox::from(ff_variable); - // TODO: Make hercules box handle structs, for now we'll copy into a vec - let ff_fc_density_energy = vec![ - ff_fc_density_energy.x, - ff_fc_density_energy.y, - ff_fc_density_energy.z, - ]; - let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); - let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; - let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); - let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; - let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); - let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; - let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + let normals_x = HerculesImmBox::from(normals.x.as_slice()); + let normals_y = HerculesImmBox::from(normals.y.as_slice()); + let normals_z = HerculesImmBox::from(normals.z.as_slice()); let mut runner = runner!(pre_euler); - - let variables = variables.to(); - - let (density, x_m, y_m, z_m, energy) = async_std::task::block_on(async { + let (density, momentum_x, momentum_y, momentum_z, energy) = + async_std::task::block_on(async { runner .run( nelr as u64, iterations as u64, - variables, + v_density.to(), + v_momentum_x.to(), + v_momentum_y.to(), + v_momentum_z.to(), + v_energy.to(), areas.to(), elements_surrounding_elements.to(), - normals.to(), - ff_variable.to(), - ff_fc_density_energy.to(), - ff_fc_momentum_x.to(), - ff_fc_momentum_y.to(), - ff_fc_momentum_z.to(), + normals_x.to(), + normals_y.to(), + normals_z.to(), + ff_variable.density, + ff_variable.momentum.x, + ff_variable.momentum.y, + ff_variable.momentum.z, + ff_variable.energy, + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ff_fc_momentum_x.x, + ff_fc_momentum_x.y, + ff_fc_momentum_x.z, + ff_fc_momentum_y.x, + ff_fc_momentum_y.y, + ff_fc_momentum_y.z, + ff_fc_momentum_z.x, + ff_fc_momentum_z.y, + ff_fc_momentum_z.z, ) .await }); - let total = vec![]; - let density = HerculesMutBox::from(density).as_slice(); - let x_m = HerculesMutBox::from(x_m).as_slice(); - let y_m = HerculesMutBox::from(y_m).as_slice(); - let z_m = HerculesMutBox::from(z_m).as_slice(); - let energy = HerculesMutBox::from(energy).as_slice(); - total.extend(density.into_iter().map(|id: &mut f32| *id)); - total.extend(x_m.into_iter().map(|id: &mut f32| *id)); - total.extend(y_m.into_iter().map(|id: &mut f32| *id)); - total.extend(z_m.into_iter().map(|id: &mut f32| *id)); - total.extend(energy.into_iter().map(|id: &mut f32| *id)); - total + Variables { + density: AlignedSlice::from_slice(HerculesMutBox::from(density).as_slice()), + momentum: Momentum { + x: AlignedSlice::from_slice(HerculesMutBox::from(momentum_x).as_slice()), + y: AlignedSlice::from_slice(HerculesMutBox::from(momentum_y).as_slice()), + z: AlignedSlice::from_slice(HerculesMutBox::from(momentum_z).as_slice()), + }, + energy: AlignedSlice::from_slice(HerculesMutBox::from(energy).as_slice()), + } } fn compare_float(x: f32, y: f32) -> bool { (x - y).abs() < 1e-5 } -fn compare_floats(xs: &[f32], ys: &[f32]) -> bool { - xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y)) +fn compare_floats(xs: &Variables, ys: &Variables) -> bool { + let xs_density = xs.density.as_slice(); + let xs_momentum_x = xs.momentum.x.as_slice(); + let xs_momentum_y = xs.momentum.y.as_slice(); + let xs_momentum_z = xs.momentum.z.as_slice(); + let xs_energy = xs.energy.as_slice(); + + let ys_density = ys.density.as_slice(); + let ys_momentum_x = ys.momentum.x.as_slice(); + let ys_momentum_y = ys.momentum.y.as_slice(); + let ys_momentum_z = ys.momentum.z.as_slice(); + let ys_energy = ys.energy.as_slice(); + + xs_density.len() == ys_density.len() + && xs_density.iter().zip(ys_density.iter()).all(|(x, y)| compare_float(*x, *y)) + && xs_momentum_x.len() == ys_momentum_x.len() + && xs_momentum_x.iter().zip(ys_momentum_x.iter()).all(|(x, y)| compare_float(*x, *y)) + && xs_momentum_y.len() == ys_momentum_y.len() + && xs_momentum_y.iter().zip(ys_momentum_y.iter()).all(|(x, y)| compare_float(*x, *y)) + && xs_momentum_z.len() == ys_momentum_z.len() + && xs_momentum_z.iter().zip(ys_momentum_z.iter()).all(|(x, y)| compare_float(*x, *y)) + && xs_energy.len() == ys_energy.len() + && xs_energy.iter().zip(ys_energy.iter()).all(|(x, y)| compare_float(*x, *y)) } pub fn cfd_harness(args: CFDInputs) { @@ -172,8 +208,6 @@ pub fn cfd_harness(args: CFDInputs) { pre_euler, } = args; - assert!(block_size % 16 == 0, "Hercules expects all arrays to be 64-byte aligned, cfd uses structs of arrays that are annoying to deal with if the block_size is not a multiple of 16"); - let FarFieldConditions { ff_variable, ff_fc_momentum_x, @@ -189,7 +223,7 @@ pub fn cfd_harness(args: CFDInputs) { normals, } = read_domain_geometry(data_file, block_size); - let variables = initialize_variables(nelr, ff_variable.as_slice()); + let variables = initialize_variables(nelr, &ff_variable); let res_juno = if pre_euler { run_pre_euler( @@ -198,8 +232,8 @@ pub fn cfd_harness(args: CFDInputs) { variables.clone(), areas.as_slice(), elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), + &normals, + &ff_variable, &ff_fc_density_energy, &ff_fc_momentum_x, &ff_fc_momentum_y, @@ -212,8 +246,8 @@ pub fn cfd_harness(args: CFDInputs) { variables.clone(), areas.as_slice(), elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), + &normals, + &ff_variable, &ff_fc_density_energy, &ff_fc_momentum_x, &ff_fc_momentum_y, @@ -227,8 +261,8 @@ pub fn cfd_harness(args: CFDInputs) { variables, areas.as_slice(), elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), + &normals, + &ff_variable, &ff_fc_density_energy, &ff_fc_momentum_x, &ff_fc_momentum_y, @@ -241,8 +275,8 @@ pub fn cfd_harness(args: CFDInputs) { variables, areas.as_slice(), elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), + &normals, + &ff_variable, &ff_fc_density_energy, &ff_fc_momentum_x, &ff_fc_momentum_y, @@ -250,8 +284,7 @@ pub fn cfd_harness(args: CFDInputs) { ) }; - if !compare_floats(&res_juno, res_rust.as_slice()) { - assert_eq!(res_juno.len(), res_rust.as_slice().len()); + if !compare_floats(&res_juno, &res_rust) { panic!("Mismatch in results"); } } diff --git a/juno_samples/rodinia/cfd/src/rust_cfd.rs b/juno_samples/rodinia/cfd/src/rust_cfd.rs index 42479df8..6d92c2b5 100644 --- a/juno_samples/rodinia/cfd/src/rust_cfd.rs +++ b/juno_samples/rodinia/cfd/src/rust_cfd.rs @@ -58,19 +58,19 @@ fn compute_speed_of_sound(density: f32, pressure: f32) -> f32 { (GAMMA * pressure / density).sqrt() } -fn compute_step_factor(nelr: usize, variables: &[f32], areas: &[f32]) -> Vec<f32> { +fn compute_step_factor(nelr: usize, variables: &Variables, areas: &[f32]) -> Vec<f32> { let mut step_factors = vec![0.0; nelr]; for i in 0..nelr { - let density = variables[i + VAR_DENSITY * nelr]; + let density = variables.density[i]; let momentum = Float3 { - x: variables[i + (VAR_MOMENTUM + 0) * nelr], - y: variables[i + (VAR_MOMENTUM + 1) * nelr], - z: variables[i + (VAR_MOMENTUM + 2) * nelr], + x: variables.momentum.x[i], + y: variables.momentum.y[i], + z: variables.momentum.z[i], }; - let density_energy = variables[i + VAR_DENSITY_ENERGY * nelr]; + let density_energy = variables.energy[i]; let velocity = compute_velocity(density, &momentum); let speed_sqd = compute_speed_sqd(&velocity); @@ -85,28 +85,36 @@ fn compute_step_factor(nelr: usize, variables: &[f32], areas: &[f32]) -> Vec<f32 fn compute_flux_euler( nelr: usize, - variables: &[f32], + variables: &Variables, elements_surrounding_elements: &[i32], - normals: &[f32], - ff_variable: &[f32], + normals: &Normals, + ff_variable: &Variable, ff_fc_momentum_x: &Float3, ff_fc_momentum_y: &Float3, ff_fc_momentum_z: &Float3, ff_fc_density_energy: &Float3, -) -> Vec<f32> { +) -> Variables { let smoothing_coefficient: f32 = 0.2; - let mut fluxes = vec![0.0; nelr * NVAR]; + let mut fluxes = Variables { + density: AlignedSlice::of_len(nelr), + momentum: Momentum { + x: AlignedSlice::of_len(nelr), + y: AlignedSlice::of_len(nelr), + z: AlignedSlice::of_len(nelr), + }, + energy: AlignedSlice::of_len(nelr), + }; for i in 0..nelr { - let density_i = variables[i + VAR_DENSITY * nelr]; + let density_i = variables.density[i]; let momentum_i = Float3 { - x: variables[i + (VAR_MOMENTUM + 0) * nelr], - y: variables[i + (VAR_MOMENTUM + 1) * nelr], - z: variables[i + (VAR_MOMENTUM + 2) * nelr], + x: variables.momentum.x[i], + y: variables.momentum.y[i], + z: variables.momentum.z[i], }; - let density_energy_i = variables[i + VAR_DENSITY_ENERGY * nelr]; + let density_energy_i = variables.energy[i]; let velocity_i = compute_velocity(density_i, &momentum_i); let speed_sqd_i = compute_speed_sqd(&velocity_i); @@ -134,9 +142,9 @@ fn compute_flux_euler( for j in 0..NNB { let nb = elements_surrounding_elements[i + j * nelr]; let normal = Float3 { - x: normals[i + (j + 0 * NNB) * nelr], - y: normals[i + (j + 1 * NNB) * nelr], - z: normals[i + (j + 2 * NNB) * nelr], + x: normals.x[i + j * nelr], + y: normals.y[i + j * nelr], + z: normals.z[i + j * nelr], }; let normal_len = (normal.x * normal.x + normal.y * normal.y + normal.z * normal.z).sqrt(); @@ -144,13 +152,13 @@ fn compute_flux_euler( if nb >= 0 { // legitimate neighbor let nb = nb as usize; - let density_nb = variables[nb + VAR_DENSITY * nelr]; + let density_nb = variables.density[nb]; let momentum_nb = Float3 { - x: variables[nb + (VAR_MOMENTUM + 0) * nelr], - y: variables[nb + (VAR_MOMENTUM + 1) * nelr], - z: variables[nb + (VAR_MOMENTUM + 2) * nelr], + x: variables.momentum.x[nb], + y: variables.momentum.y[nb], + z: variables.momentum.z[nb], }; - let density_energy_nb = variables[nb + VAR_DENSITY_ENERGY * nelr]; + let density_energy_nb = variables.energy[nb]; let velocity_nb = compute_velocity(density_nb, &momentum_nb); let speed_sqd_nb = compute_speed_sqd(&velocity_nb); let pressure_nb = compute_pressure(density_nb, density_energy_nb, speed_sqd_nb); @@ -205,21 +213,21 @@ fn compute_flux_euler( } else if nb == -2 { // a far field boundary let factor = 0.5 * normal.x; - flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 0] + momentum_i.x); + flux_i_density += factor * (ff_variable.momentum.x + momentum_i.x); flux_i_density_energy += factor * (ff_fc_density_energy.x + fc_i_density_energy.x); flux_i_momentum.x += factor * (ff_fc_momentum_x.x + fc_i_momentum_x.x); flux_i_momentum.y += factor * (ff_fc_momentum_y.x + fc_i_momentum_y.x); flux_i_momentum.z += factor * (ff_fc_momentum_z.x + fc_i_momentum_z.x); let factor = 0.5 * normal.y; - flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 1] + momentum_i.y); + flux_i_density += factor * (ff_variable.momentum.y + momentum_i.y); flux_i_density_energy += factor * (ff_fc_density_energy.y + fc_i_density_energy.y); flux_i_momentum.x += factor * (ff_fc_momentum_x.y + fc_i_momentum_x.y); flux_i_momentum.y += factor * (ff_fc_momentum_y.y + fc_i_momentum_y.y); flux_i_momentum.z += factor * (ff_fc_momentum_z.y + fc_i_momentum_z.y); let factor = 0.5 * normal.z; - flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 2] + momentum_i.z); + flux_i_density += factor * (ff_variable.momentum.z + momentum_i.z); flux_i_density_energy += factor * (ff_fc_density_energy.z + fc_i_density_energy.z); flux_i_momentum.x += factor * (ff_fc_momentum_x.z + fc_i_momentum_x.z); flux_i_momentum.y += factor * (ff_fc_momentum_y.z + fc_i_momentum_y.z); @@ -227,11 +235,11 @@ fn compute_flux_euler( } } - fluxes[i + VAR_DENSITY * nelr] = flux_i_density; - fluxes[i + (VAR_MOMENTUM + 0) * nelr] = flux_i_momentum.x; - fluxes[i + (VAR_MOMENTUM + 1) * nelr] = flux_i_momentum.y; - fluxes[i + (VAR_MOMENTUM + 2) * nelr] = flux_i_momentum.z; - fluxes[i + VAR_DENSITY_ENERGY * nelr] = flux_i_density_energy; + fluxes.density[i] = flux_i_density; + fluxes.momentum.x[i] = flux_i_momentum.x; + fluxes.momentum.y[i] = flux_i_momentum.y; + fluxes.momentum.z[i] = flux_i_momentum.z; + fluxes.energy[i] = flux_i_density_energy; } fluxes @@ -239,7 +247,7 @@ fn compute_flux_euler( fn compute_flux_contributions( nelr: usize, - variables: &[f32], + variables: &Variables, ) -> (Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) { let mut fc_momentum_x = vec![0.0; nelr * NDIM]; let mut fc_momentum_y = vec![0.0; nelr * NDIM]; @@ -247,14 +255,14 @@ fn compute_flux_contributions( let mut fc_density_energy = vec![0.0; nelr * NDIM]; for i in 0..nelr { - let density_i = variables[i + VAR_DENSITY * nelr]; + let density_i = variables.density[i]; let momentum_i = Float3 { - x: variables[i + (VAR_MOMENTUM + 0) * nelr], - y: variables[i + (VAR_MOMENTUM + 1) * nelr], - z: variables[i + (VAR_MOMENTUM + 2) * nelr], + x: variables.momentum.x[i], + y: variables.momentum.y[i], + z: variables.momentum.z[i], }; - let density_energy_i = variables[i + VAR_DENSITY_ENERGY * nelr]; + let density_energy_i = variables.energy[i]; let velocity_i = compute_velocity(density_i, &momentum_i); let speed_sqd_i = compute_speed_sqd(&velocity_i); @@ -298,32 +306,40 @@ fn compute_flux_contributions( fn compute_flux_pre_euler( nelr: usize, - variables: &[f32], + variables: &Variables, elements_surrounding_elements: &[i32], - normals: &[f32], + normals: &Normals, fc_momentum_x: &[f32], fc_momentum_y: &[f32], fc_momentum_z: &[f32], fc_density_energy: &[f32], - ff_variable: &[f32], + ff_variable: &Variable, ff_fc_momentum_x: &Float3, ff_fc_momentum_y: &Float3, ff_fc_momentum_z: &Float3, ff_fc_density_energy: &Float3, -) -> Vec<f32> { +) -> Variables { let smoothing_coefficient: f32 = 0.2; - let mut fluxes = vec![0.0; nelr * NVAR]; + let mut fluxes = Variables { + density: AlignedSlice::of_len(nelr), + momentum: Momentum { + x: AlignedSlice::of_len(nelr), + y: AlignedSlice::of_len(nelr), + z: AlignedSlice::of_len(nelr), + }, + energy: AlignedSlice::of_len(nelr), + }; for i in 0..nelr { - let density_i = variables[i + VAR_DENSITY * nelr]; + let density_i = variables.density[i]; let momentum_i = Float3 { - x: variables[i + (VAR_MOMENTUM + 0) * nelr], - y: variables[i + (VAR_MOMENTUM + 1) * nelr], - z: variables[i + (VAR_MOMENTUM + 2) * nelr], + x: variables.momentum.x[i], + y: variables.momentum.y[i], + z: variables.momentum.z[i], }; - let density_energy_i = variables[i + VAR_DENSITY_ENERGY * nelr]; + let density_energy_i = variables.energy[i]; let velocity_i = compute_velocity(density_i, &momentum_i); let speed_sqd_i = compute_speed_sqd(&velocity_i); @@ -363,22 +379,22 @@ fn compute_flux_pre_euler( for j in 0..NNB { let nb = elements_surrounding_elements[i + j * nelr]; let normal = Float3 { - x: normals[i + (j + 0 * NNB) * nelr], - y: normals[i + (j + 1 * NNB) * nelr], - z: normals[i + (j + 2 * NNB) * nelr], + x: normals.x[i + j * nelr], + y: normals.y[i + j * nelr], + z: normals.z[i + j * nelr], }; let normal_len = (normal.x * normal.x + normal.y * normal.y + normal.z * normal.z).sqrt(); if nb >= 0 { let nb = nb as usize; - let density_nb = variables[nb + VAR_DENSITY * nelr]; + let density_nb = variables.density[nb]; let momentum_nb = Float3 { - x: variables[nb + (VAR_MOMENTUM + 0) * nelr], - y: variables[nb + (VAR_MOMENTUM + 1) * nelr], - z: variables[nb + (VAR_MOMENTUM + 2) * nelr], + x: variables.momentum.x[nb], + y: variables.momentum.y[nb], + z: variables.momentum.z[nb], }; - let density_energy_nb = variables[nb + VAR_DENSITY_ENERGY * nelr]; + let density_energy_nb = variables.energy[nb]; let velocity_nb = compute_velocity(density_nb, &momentum_nb); let speed_sqd_nb = compute_speed_sqd(&velocity_nb); let pressure_nb = compute_pressure(density_nb, density_energy_nb, speed_sqd_nb); @@ -443,21 +459,21 @@ fn compute_flux_pre_euler( flux_i_momentum.z += normal.z * pressure_i; } else if nb == -2 { let factor = 0.5 * normal.x; - flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 0] + momentum_i.x); + flux_i_density += factor * (ff_variable.momentum.x + momentum_i.x); flux_i_density_energy += factor * (ff_fc_density_energy.x + fc_i_density_energy.x); flux_i_momentum.x += factor * (ff_fc_momentum_x.x + fc_i_momentum_x.x); flux_i_momentum.y += factor * (ff_fc_momentum_y.x + fc_i_momentum_y.x); flux_i_momentum.z += factor * (ff_fc_momentum_z.x + fc_i_momentum_z.x); let factor = 0.5 * normal.y; - flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 1] + momentum_i.y); + flux_i_density += factor * (ff_variable.momentum.y + momentum_i.y); flux_i_density_energy += factor * (ff_fc_density_energy.y + fc_i_density_energy.y); flux_i_momentum.x += factor * (ff_fc_momentum_x.y + fc_i_momentum_x.y); flux_i_momentum.y += factor * (ff_fc_momentum_y.y + fc_i_momentum_y.y); flux_i_momentum.z += factor * (ff_fc_momentum_z.y + fc_i_momentum_z.y); let factor = 0.5 * normal.z; - flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 2] + momentum_i.z); + flux_i_density += factor * (ff_variable.momentum.z + momentum_i.z); flux_i_density_energy += factor * (ff_fc_density_energy.z + fc_i_density_energy.z); flux_i_momentum.x += factor * (ff_fc_momentum_x.z + fc_i_momentum_x.z); flux_i_momentum.y += factor * (ff_fc_momentum_y.z + fc_i_momentum_y.z); @@ -465,11 +481,11 @@ fn compute_flux_pre_euler( } } - fluxes[i + VAR_DENSITY * nelr] = flux_i_density; - fluxes[i + (VAR_MOMENTUM + 0) * nelr] = flux_i_momentum.x; - fluxes[i + (VAR_MOMENTUM + 1) * nelr] = flux_i_momentum.y; - fluxes[i + (VAR_MOMENTUM + 2) * nelr] = flux_i_momentum.z; - fluxes[i + VAR_DENSITY_ENERGY * nelr] = flux_i_density_energy; + fluxes.density[i] = flux_i_density; + fluxes.momentum.x[i] = flux_i_momentum.x; + fluxes.momentum.y[i] = flux_i_momentum.y; + fluxes.momentum.z[i] = flux_i_momentum.z; + fluxes.energy[i] = flux_i_density_energy; } fluxes @@ -478,47 +494,42 @@ fn compute_flux_pre_euler( fn time_step( j: usize, nelr: usize, - old_variables: &[f32], - variables: &mut AlignedSlice<f32>, + old_variables: &Variables, + variables: &mut Variables, step_factors: &[f32], - fluxes: &[f32], + fluxes: &Variables, ) { for i in 0..nelr { let factor = step_factors[i] / (RK + 1 - j) as f32; - variables[i + VAR_DENSITY * nelr] = - old_variables[i + VAR_DENSITY * nelr] + factor * fluxes[i + VAR_DENSITY * nelr]; - variables[i + (VAR_MOMENTUM + 0) * nelr] = old_variables[i + (VAR_MOMENTUM + 0) * nelr] - + factor * fluxes[i + (VAR_MOMENTUM + 0) * nelr]; - variables[i + (VAR_MOMENTUM + 1) * nelr] = old_variables[i + (VAR_MOMENTUM + 1) * nelr] - + factor * fluxes[i + (VAR_MOMENTUM + 1) * nelr]; - variables[i + (VAR_MOMENTUM + 2) * nelr] = old_variables[i + (VAR_MOMENTUM + 2) * nelr] - + factor * fluxes[i + (VAR_MOMENTUM + 2) * nelr]; - variables[i + VAR_DENSITY_ENERGY * nelr] = old_variables[i + VAR_DENSITY_ENERGY * nelr] - + factor * fluxes[i + VAR_DENSITY_ENERGY * nelr]; + variables.density[i] = old_variables.density[i] + factor * fluxes.density[i]; + variables.momentum.x[i] = old_variables.momentum.x[i] + factor * fluxes.momentum.x[i]; + variables.momentum.y[i] = old_variables.momentum.y[i] + factor * fluxes.momentum.y[i]; + variables.momentum.z[i] = old_variables.momentum.z[i] + factor * fluxes.momentum.z[i]; + variables.energy[i] = old_variables.energy[i] + factor * fluxes.energy[i]; } } pub fn euler( nelr: usize, iterations: usize, - mut variables: AlignedSlice<f32>, + mut variables: Variables, areas: &[f32], elements_surrounding_elements: &[i32], - normals: &[f32], - ff_variable: &[f32], + normals: &Normals, + ff_variable: &Variable, ff_fc_density_energy: &Float3, ff_fc_momentum_x: &Float3, ff_fc_momentum_y: &Float3, ff_fc_momentum_z: &Float3, -) -> AlignedSlice<f32> { +) -> Variables { for i in 0..iterations { let old_variables = variables.clone(); - let step_factors = compute_step_factor(nelr, variables.as_slice(), areas); + let step_factors = compute_step_factor(nelr, &variables, areas); for j in 0..RK { let fluxes = compute_flux_euler( nelr, - variables.as_slice(), + &variables, elements_surrounding_elements, normals, ff_variable, @@ -530,7 +541,7 @@ pub fn euler( time_step( j, nelr, - old_variables.as_slice(), + &old_variables, &mut variables, &step_factors, &fluxes, @@ -544,26 +555,26 @@ pub fn euler( pub fn pre_euler( nelr: usize, iterations: usize, - mut variables: AlignedSlice<f32>, + mut variables: Variables, areas: &[f32], elements_surrounding_elements: &[i32], - normals: &[f32], - ff_variable: &[f32], + normals: &Normals, + ff_variable: &Variable, ff_fc_density_energy: &Float3, ff_fc_momentum_x: &Float3, ff_fc_momentum_y: &Float3, ff_fc_momentum_z: &Float3, -) -> AlignedSlice<f32> { +) -> Variables { for i in 0..iterations { let old_variables = variables.clone(); - let step_factors = compute_step_factor(nelr, variables.as_slice(), areas); + let step_factors = compute_step_factor(nelr, &variables, areas); for j in 0..RK { let (fc_momentum_x, fc_momentum_y, fc_momentum_z, fc_density_energy) = - compute_flux_contributions(nelr, variables.as_slice()); + compute_flux_contributions(nelr, &variables); let fluxes = compute_flux_pre_euler( nelr, - variables.as_slice(), + &variables, elements_surrounding_elements, normals, &fc_momentum_x, @@ -579,7 +590,7 @@ pub fn pre_euler( time_step( j, nelr, - old_variables.as_slice(), + &old_variables, &mut variables, &step_factors, &fluxes, diff --git a/juno_samples/rodinia/cfd/src/setup.rs b/juno_samples/rodinia/cfd/src/setup.rs index b996f057..0da00251 100644 --- a/juno_samples/rodinia/cfd/src/setup.rs +++ b/juno_samples/rodinia/cfd/src/setup.rs @@ -1,10 +1,10 @@ +use std::fmt::{Debug, Error, Formatter}; use std::fs::File; use std::io::Read; +use std::ops::{Index, IndexMut}; use std::path::Path; use std::str::FromStr; -use std::ops::{Index, IndexMut}; - use nom::Parser; use crate::rust_cfd; @@ -14,10 +14,6 @@ pub const ff_mach: f32 = 1.2; pub const NDIM: usize = 3; pub const NNB: usize = 4; pub const RK: usize = 3; -pub const VAR_DENSITY: usize = 0; -pub const VAR_MOMENTUM: usize = 1; -pub const VAR_DENSITY_ENERGY: usize = VAR_MOMENTUM + NDIM; -pub const NVAR: usize = VAR_DENSITY_ENERGY + 1; pub const deg_angle_of_attack: f32 = 0.0; #[repr(align(64))] @@ -31,6 +27,15 @@ pub struct AlignedSlice<T> { length: usize, } +impl<T> Debug for AlignedSlice<T> +where + T: Debug +{ + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:?}", self.as_slice()) + } +} + impl<T> AlignedSlice<T> where T: Default, @@ -53,6 +58,17 @@ where } } +impl<T> AlignedSlice<T> +where + T: Clone + Default, +{ + pub fn from_slice(slice: &[T]) -> Self { + let mut res = Self::of_len(slice.len()); + res.as_mut_slice().clone_from_slice(slice); + res + } +} + impl<T> AlignedSlice<T> { pub fn as_slice(&self) -> &[T] { &self.slice[self.offset..self.offset + self.length] @@ -90,15 +106,20 @@ where } } -#[repr(C)] pub struct Float3 { pub x: f32, pub y: f32, pub z: f32, } +pub struct Variable { + pub density: f32, + pub momentum: Float3, + pub energy: f32, +} + pub struct FarFieldConditions { - pub ff_variable: AlignedSlice<f32>, + pub ff_variable: Variable, pub ff_fc_momentum_x: Float3, pub ff_fc_momentum_y: Float3, pub ff_fc_momentum_z: Float3, @@ -106,14 +127,14 @@ pub struct FarFieldConditions { } pub fn set_far_field_conditions() -> FarFieldConditions { - let mut ff_variable: AlignedSlice<f32> = AlignedSlice::of_len(NVAR); + let mut ff_variable = Variable { density: 0.0, momentum: Float3 { x: 0.0, y: 0.0, z: 0.0 }, energy: 0.0 }; let angle_of_attack = std::f32::consts::PI / 180.0 * deg_angle_of_attack; - ff_variable[VAR_DENSITY] = 1.4; + ff_variable.density = 1.4; let ff_pressure = 1.0; - let ff_speed_of_sound = (GAMMA * ff_pressure / ff_variable[VAR_DENSITY]).sqrt(); + let ff_speed_of_sound = (GAMMA * ff_pressure / ff_variable.density).sqrt(); let ff_speed = ff_mach * ff_speed_of_sound; let ff_velocity = Float3 { @@ -122,23 +143,23 @@ pub fn set_far_field_conditions() -> FarFieldConditions { z: 0.0, }; - ff_variable[VAR_MOMENTUM + 0] = ff_variable[VAR_DENSITY] * ff_velocity.x; - ff_variable[VAR_MOMENTUM + 1] = ff_variable[VAR_DENSITY] * ff_velocity.y; - ff_variable[VAR_MOMENTUM + 2] = ff_variable[VAR_DENSITY] * ff_velocity.z; + ff_variable.momentum.x = ff_variable.density * ff_velocity.x; + ff_variable.momentum.y = ff_variable.density * ff_velocity.y; + ff_variable.momentum.z = ff_variable.density * ff_velocity.z; - ff_variable[VAR_DENSITY_ENERGY] = - ff_variable[VAR_DENSITY] * (0.5 * (ff_speed * ff_speed)) + (ff_pressure / (GAMMA - 1.0)); + ff_variable.energy = + ff_variable.density * (0.5 * (ff_speed * ff_speed)) + (ff_pressure / (GAMMA - 1.0)); let ff_momentum = Float3 { - x: ff_variable[VAR_MOMENTUM + 0], - y: ff_variable[VAR_MOMENTUM + 1], - z: ff_variable[VAR_MOMENTUM + 2], + x: ff_variable.momentum.x, + y: ff_variable.momentum.y, + z: ff_variable.momentum.z, }; let (ff_fc_momentum_x, ff_fc_momentum_y, ff_fc_momentum_z, ff_fc_density_energy) = rust_cfd::compute_flux_contribution( - ff_variable[VAR_DENSITY], + ff_variable.density, &ff_momentum, - ff_variable[VAR_DENSITY_ENERGY], + ff_variable.energy, ff_pressure, &ff_velocity, ); @@ -152,11 +173,17 @@ pub fn set_far_field_conditions() -> FarFieldConditions { } } +pub struct Normals { + pub x: AlignedSlice<f32>, + pub y: AlignedSlice<f32>, + pub z: AlignedSlice<f32>, +} + pub struct GeometryData { pub nelr: usize, pub areas: AlignedSlice<f32>, pub elements_surrounding_elements: AlignedSlice<i32>, - pub normals: AlignedSlice<f32>, + pub normals: Normals, } pub fn read_domain_geometry<P>(path: P, block_size: usize) -> GeometryData @@ -183,7 +210,11 @@ fn cfd_parser<'a>(block_size: usize, text: &'a str) -> nom::IResult<&'a str, Geo let mut areas: AlignedSlice<f32> = AlignedSlice::of_len(nelr); let mut elements_surrounding_elements: AlignedSlice<i32> = AlignedSlice::of_len(nelr * NNB); - let mut normals: AlignedSlice<f32> = AlignedSlice::of_len(NDIM * NNB * nelr); + let mut normals = Normals { + x: AlignedSlice::of_len(NNB * nelr), + y: AlignedSlice::of_len(NNB * nelr), + z: AlignedSlice::of_len(NNB * nelr), + }; // read in data let mut text = text; @@ -209,14 +240,21 @@ fn cfd_parser<'a>(block_size: usize, text: &'a str) -> nom::IResult<&'a str, Geo let val = i32::from_str(val).unwrap(); let val = if neg { -val } else { val }; elements_surrounding_elements[i + j * nelr] = if val < 0 { -1 } else { val } - 1; // it's coming in with Fortran numbering - - for k in 0..NDIM { - let t = nom::character::complete::multispace0(text)?.0; - let (t, val) = nom::number::complete::float(t)?; - text = t; - - normals[i + (j + k * NNB) * nelr] = -val; - } + + let t = nom::character::complete::multispace0(text)?.0; + let (t, val) = nom::number::complete::float(t)?; + text = t; + normals.x[i + j * nelr] = -val; + + let t = nom::character::complete::multispace0(text)?.0; + let (t, val) = nom::number::complete::float(t)?; + text = t; + normals.y[i + j * nelr] = -val; + + let t = nom::character::complete::multispace0(text)?.0; + let (t, val) = nom::number::complete::float(t)?; + text = t; + normals.z[i + j * nelr] = -val; } } @@ -227,9 +265,9 @@ fn cfd_parser<'a>(block_size: usize, text: &'a str) -> nom::IResult<&'a str, Geo for j in 0..NNB { elements_surrounding_elements[i + j * nelr] = elements_surrounding_elements[last + j * nelr]; - for k in 0..NDIM { - normals[i + (j + k * NNB) * nelr] = normals[last + (j + k * NNB) * nelr]; - } + normals.x[i + j * nelr] = normals.x[last + j * nelr]; + normals.y[i + j * nelr] = normals.y[last + j * nelr]; + normals.z[i + j * nelr] = normals.z[last + j * nelr]; } } @@ -245,13 +283,37 @@ fn cfd_parser<'a>(block_size: usize, text: &'a str) -> nom::IResult<&'a str, Geo )) } -pub fn initialize_variables(nelr: usize, ff_variable: &[f32]) -> AlignedSlice<f32> { - let mut variables = AlignedSlice::of_len(nelr * NVAR); +#[derive(Clone)] +pub struct Momentum { + pub x: AlignedSlice<f32>, + pub y: AlignedSlice<f32>, + pub z: AlignedSlice<f32>, +} + +#[derive(Clone)] +pub struct Variables { + pub density: AlignedSlice<f32>, + pub momentum: Momentum, + pub energy: AlignedSlice<f32>, +} + +pub fn initialize_variables(nelr: usize, ff_variable: &Variable) -> Variables { + let mut variables = Variables { + density: AlignedSlice::of_len(nelr), + momentum: Momentum { + x: AlignedSlice::of_len(nelr), + y: AlignedSlice::of_len(nelr), + z: AlignedSlice::of_len(nelr), + }, + energy: AlignedSlice::of_len(nelr), + }; for i in 0..nelr { - for j in 0..NVAR { - variables[i + j * nelr] = ff_variable[j]; - } + variables.density[i] = ff_variable.density; + variables.momentum.x[i] = ff_variable.momentum.x; + variables.momentum.y[i] = ff_variable.momentum.y; + variables.momentum.z[i] = ff_variable.momentum.z; + variables.energy[i] = ff_variable.energy; } variables -- GitLab From 1dd164a4f31255db25830a49d1d047901963fe4c Mon Sep 17 00:00:00 2001 From: Aaron Councilman <aaronjc4@illinois.edu> Date: Sat, 1 Mar 2025 13:47:58 -0600 Subject: [PATCH 5/6] Fix cfd bench --- juno_samples/rodinia/cfd/benches/cfd_bench.rs | 122 +++++++++++------- 1 file changed, 76 insertions(+), 46 deletions(-) diff --git a/juno_samples/rodinia/cfd/benches/cfd_bench.rs b/juno_samples/rodinia/cfd/benches/cfd_bench.rs index fd614b42..5fc73db9 100644 --- a/juno_samples/rodinia/cfd/benches/cfd_bench.rs +++ b/juno_samples/rodinia/cfd/benches/cfd_bench.rs @@ -28,40 +28,55 @@ fn cfd_bench(c: &mut Criterion) { elements_surrounding_elements, normals, } = read_domain_geometry(data_file, block_size); - let mut variables = initialize_variables(nelr, ff_variable.as_slice()); - let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let mut variables = initialize_variables(nelr, &ff_variable); + + let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice()); + let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice()); + let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice()); + let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice()); + let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice()); + let areas = HerculesImmBox::from(areas.as_slice()); let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements.as_slice()); - let normals = HerculesImmBox::from(normals.as_slice()); - let ff_variable = HerculesImmBox::from(ff_variable.as_slice()); - let ff_fc_density_energy = vec![ - ff_fc_density_energy.x, - ff_fc_density_energy.y, - ff_fc_density_energy.z, - ]; - let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); - let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; - let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); - let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; - let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); - let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; - let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + + let normals_x = HerculesImmBox::from(normals.x.as_slice()); + let normals_y = HerculesImmBox::from(normals.y.as_slice()); + let normals_z = HerculesImmBox::from(normals.z.as_slice()); + group.bench_function("cfd bench euler", |b| { b.iter(|| { async_std::task::block_on(async { r.run( nelr as u64, iterations as u64, - variables.to(), + v_density.to(), + v_momentum_x.to(), + v_momentum_y.to(), + v_momentum_z.to(), + v_energy.to(), areas.to(), elements_surrounding_elements.to(), - normals.to(), - ff_variable.to(), - ff_fc_density_energy.to(), - ff_fc_momentum_x.to(), - ff_fc_momentum_y.to(), - ff_fc_momentum_z.to(), + normals_x.to(), + normals_y.to(), + normals_z.to(), + ff_variable.density, + ff_variable.momentum.x, + ff_variable.momentum.y, + ff_variable.momentum.z, + ff_variable.energy, + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ff_fc_momentum_x.x, + ff_fc_momentum_x.y, + ff_fc_momentum_x.z, + ff_fc_momentum_y.x, + ff_fc_momentum_y.y, + ff_fc_momentum_y.z, + ff_fc_momentum_z.x, + ff_fc_momentum_z.y, + ff_fc_momentum_z.z, ) .await }); @@ -85,40 +100,55 @@ fn cfd_bench(c: &mut Criterion) { elements_surrounding_elements, normals, } = read_domain_geometry(data_file, block_size); - let mut variables = initialize_variables(nelr, ff_variable.as_slice()); - let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let mut variables = initialize_variables(nelr, &ff_variable); + + let mut v_density = HerculesMutBox::from(variables.density.as_mut_slice()); + let mut v_momentum_x = HerculesMutBox::from(variables.momentum.x.as_mut_slice()); + let mut v_momentum_y = HerculesMutBox::from(variables.momentum.y.as_mut_slice()); + let mut v_momentum_z = HerculesMutBox::from(variables.momentum.z.as_mut_slice()); + let mut v_energy = HerculesMutBox::from(variables.energy.as_mut_slice()); + let areas = HerculesImmBox::from(areas.as_slice()); let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements.as_slice()); - let normals = HerculesImmBox::from(normals.as_slice()); - let ff_variable = HerculesImmBox::from(ff_variable.as_slice()); - let ff_fc_density_energy = vec![ - ff_fc_density_energy.x, - ff_fc_density_energy.y, - ff_fc_density_energy.z, - ]; - let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); - let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; - let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); - let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; - let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); - let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; - let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + + let normals_x = HerculesImmBox::from(normals.x.as_slice()); + let normals_y = HerculesImmBox::from(normals.y.as_slice()); + let normals_z = HerculesImmBox::from(normals.z.as_slice()); + group.bench_function("cfd bench pre-euler", |b| { b.iter(|| { async_std::task::block_on(async { r.run( nelr as u64, iterations as u64, - variables.to(), + v_density.to(), + v_momentum_x.to(), + v_momentum_y.to(), + v_momentum_z.to(), + v_energy.to(), areas.to(), elements_surrounding_elements.to(), - normals.to(), - ff_variable.to(), - ff_fc_density_energy.to(), - ff_fc_momentum_x.to(), - ff_fc_momentum_y.to(), - ff_fc_momentum_z.to(), + normals_x.to(), + normals_y.to(), + normals_z.to(), + ff_variable.density, + ff_variable.momentum.x, + ff_variable.momentum.y, + ff_variable.momentum.z, + ff_variable.energy, + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ff_fc_momentum_x.x, + ff_fc_momentum_x.y, + ff_fc_momentum_x.z, + ff_fc_momentum_y.x, + ff_fc_momentum_y.y, + ff_fc_momentum_y.z, + ff_fc_momentum_z.x, + ff_fc_momentum_z.y, + ff_fc_momentum_z.z, ) .await }); -- GitLab From 5fb96a38840c87d68d131832ac18cd68cc297078 Mon Sep 17 00:00:00 2001 From: Aaron Councilman <aaronjc4@illinois.edu> Date: Sat, 1 Mar 2025 14:06:37 -0600 Subject: [PATCH 6/6] Fix gpu schedule --- juno_samples/rodinia/cfd/src/gpu_euler.sch | 47 ++++++++++++------- .../rodinia/cfd/src/gpu_pre_euler.sch | 45 +++++++++++------- 2 files changed, 56 insertions(+), 36 deletions(-) diff --git a/juno_samples/rodinia/cfd/src/gpu_euler.sch b/juno_samples/rodinia/cfd/src/gpu_euler.sch index 7f7ee42c..aed6115e 100644 --- a/juno_samples/rodinia/cfd/src/gpu_euler.sch +++ b/juno_samples/rodinia/cfd/src/gpu_euler.sch @@ -1,23 +1,34 @@ -gvn(*); -dce(*); -phi-elim(*); -dce(*); -crc(*); -dce(*); -slf(*); -dce(*); +macro simpl!(X) { + ccp(X); + simplify-cfg(X); + lift-dc-math(X); + gvn(X); + phi-elim(X); + crc(X); + slf(X); + dce(X); + infer-schedules(X); +} -let auto = auto-outline(euler); -gpu(auto.euler); - -inline(auto.euler); -inline(auto.euler); +simpl!(*); +inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step); delete-uncalled(*); +gpu(copy_vars, compute_step_factor, compute_flux, time_step); -sroa[false](auto.euler); -dce(*); -float-collections(*); -dce(*); +simpl!(*); +ip-sroa[true](*); +sroa[true](*); +predication(*); +const-inline(*); +simpl!(*); +fixpoint { + forkify(*); + fork-guard-elim(*); +} +simpl!(*); +no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res); +parallel-reduce(time_step, copy_vars, compute_flux@outer_loop \ compute_flux@inner_loop); +unforkify(*); +float-collections(*); gcm(*); - diff --git a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch index 33c46dab..d91f1b00 100644 --- a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch +++ b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch @@ -1,23 +1,32 @@ -gvn(*); -dce(*); -phi-elim(*); -dce(*); -crc(*); -dce(*); -slf(*); -dce(*); +macro simpl!(X) { + ccp(X); + simplify-cfg(X); + lift-dc-math(X); + gvn(X); + phi-elim(X); + crc(X); + slf(X); + dce(X); + infer-schedules(X); +} -let auto = auto-outline(pre_euler); -gpu(auto.pre_euler); - -inline(auto.pre_euler); -inline(auto.pre_euler); +simpl!(*); +inline(compute_step_factor, compute_flux, compute_flux_contributions, compute_flux_contribution, time_step); delete-uncalled(*); +gpu(copy_vars, compute_step_factor, compute_flux_contributions, compute_flux, time_step); -sroa[false](auto.pre_euler); -dce(*); -float-collections(*); -dce(*); +simpl!(*); +ip-sroa[true](*); +sroa[true](*); +predication(*); +const-inline(*); +simpl!(*); +fixpoint { + forkify(*); + fork-guard-elim(*); +} +simpl!(*); +unforkify(*); +float-collections(*); gcm(*); - -- GitLab