From d5f4aacedf2ee39ce4fe2e254cad5197df092877 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Sun, 2 Mar 2025 10:45:57 -0600 Subject: [PATCH] CFD opt --- hercules_opt/src/sroa.rs | 11 ++++++++--- juno_samples/rodinia/cfd/src/cpu_euler.sch | 2 +- juno_samples/rodinia/cfd/src/gpu_euler.sch | 18 ++++++++++++++---- .../rodinia/cfd/src/gpu_pre_euler.sch | 19 +++++++++++++++++-- 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/hercules_opt/src/sroa.rs b/hercules_opt/src/sroa.rs index e658ff88..2718f99d 100644 --- a/hercules_opt/src/sroa.rs +++ b/hercules_opt/src/sroa.rs @@ -447,7 +447,7 @@ pub fn sroa( field_map.insert(node, generate_reads(editor, types[&node], node)); } Node::Constant { id } => { - field_map.insert(node, generate_constant_fields(editor, id)); + field_map.insert(node, generate_constant_fields(editor, id, node)); to_delete.push(node); } _ => { @@ -1079,7 +1079,11 @@ pub fn generate_constant(editor: &mut FunctionEditor, typ: TypeID) -> ConstantID // Given a constant cnst adds node to the function which are the constant values of each field and // returns a list of pairs of indices and the node that holds that index -fn generate_constant_fields(editor: &mut FunctionEditor, cnst: ConstantID) -> IndexTree<NodeID> { +fn generate_constant_fields( + editor: &mut FunctionEditor, + cnst: ConstantID, + old_node: NodeID, +) -> IndexTree<NodeID> { let cs: Option<Vec<ConstantID>> = if let Some(cs) = editor.get_constant(cnst).try_product_fields() { Some(cs.into()) @@ -1090,13 +1094,14 @@ fn generate_constant_fields(editor: &mut FunctionEditor, cnst: ConstantID) -> In if let Some(cs) = cs { let mut fields = vec![]; for c in cs { - fields.push(generate_constant_fields(editor, c)); + fields.push(generate_constant_fields(editor, c, old_node)); } IndexTree::Node(fields) } else { let mut node = None; editor.edit(|mut edit| { node = Some(edit.add_node(Node::Constant { id: cnst })); + edit.sub_edit(old_node, node.unwrap()); Ok(edit) }); IndexTree::Leaf(node.expect("Add node cannot fail")) diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch index 4e7ee3cf..7a284a9a 100644 --- a/juno_samples/rodinia/cfd/src/cpu_euler.sch +++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch @@ -12,6 +12,7 @@ macro simpl!(X) { simpl!(*); inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step); +no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res); delete-uncalled(*); simpl!(*); ip-sroa[true](*); @@ -24,7 +25,6 @@ fixpoint { fork-guard-elim(*); } simpl!(*); -no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res); unforkify(compute_flux@inner_loop); fork-tile[32, 0, false, false](compute_step_factor); diff --git a/juno_samples/rodinia/cfd/src/gpu_euler.sch b/juno_samples/rodinia/cfd/src/gpu_euler.sch index aed6115e..3700f79d 100644 --- a/juno_samples/rodinia/cfd/src/gpu_euler.sch +++ b/juno_samples/rodinia/cfd/src/gpu_euler.sch @@ -12,6 +12,7 @@ macro simpl!(X) { simpl!(*); inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step); +no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res); delete-uncalled(*); gpu(copy_vars, compute_step_factor, compute_flux, time_step); @@ -26,9 +27,18 @@ fixpoint { fork-guard-elim(*); } simpl!(*); -no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res); -parallel-reduce(time_step, copy_vars, compute_flux@outer_loop \ compute_flux@inner_loop); +unforkify(compute_flux@inner_loop); + +fork-tile[32, 0, false, true](compute_step_factor); +fork-split(compute_step_factor); + +fork-tile[32, 0, false, true](compute_flux); +fork-split(compute_flux); + +fork-tile[32, 0, false, true](time_step); +fork-split(time_step); + +fork-tile[32, 0, false, true](copy_vars); +fork-split(copy_vars); -unforkify(*); -float-collections(*); gcm(*); diff --git a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch index d91f1b00..d6db675b 100644 --- a/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch +++ b/juno_samples/rodinia/cfd/src/gpu_pre_euler.sch @@ -12,6 +12,7 @@ macro simpl!(X) { simpl!(*); inline(compute_step_factor, compute_flux, compute_flux_contributions, compute_flux_contribution, time_step); +no-memset(compute_step_factor@res, compute_flux_contributions@res, compute_flux@res, copy_vars@res); delete-uncalled(*); gpu(copy_vars, compute_step_factor, compute_flux_contributions, compute_flux, time_step); @@ -26,7 +27,21 @@ fixpoint { fork-guard-elim(*); } simpl!(*); +unforkify(compute_flux@inner_loop); + +fork-tile[32, 0, false, true](compute_step_factor); +fork-split(compute_step_factor); + +fork-tile[32, 0, false, true](compute_flux_contributions); +fork-split(compute_flux_contributions); + +fork-tile[32, 0, false, true](compute_flux); +fork-split(compute_flux); + +fork-tile[32, 0, false, true](time_step); +fork-split(time_step); + +fork-tile[32, 0, false, true](copy_vars); +fork-split(copy_vars); -unforkify(*); -float-collections(*); gcm(*); -- GitLab