From a3483234d1a5b7607cea444d8be67180bbc36816 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Sun, 2 Mar 2025 09:59:23 -0600 Subject: [PATCH] Multi-core cfd --- juno_samples/rodinia/cfd/src/cpu_euler.sch | 28 ++++++++++++++-- .../rodinia/cfd/src/cpu_pre_euler.sch | 33 ++++++++++++++++++- juno_samples/rodinia/cfd/src/lib.rs | 1 + juno_samples/rodinia/cfd/src/pre_euler.jn | 18 +++++----- 4 files changed, 68 insertions(+), 12 deletions(-) diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch index 4cf320a6..4e7ee3cf 100644 --- a/juno_samples/rodinia/cfd/src/cpu_euler.sch +++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch @@ -25,7 +25,31 @@ fixpoint { } simpl!(*); no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res); -parallel-reduce(time_step, copy_vars, compute_flux@outer_loop \ compute_flux@inner_loop); +unforkify(compute_flux@inner_loop); -unforkify(*); +fork-tile[32, 0, false, false](compute_step_factor); +let split = fork-split(compute_step_factor); +let compute_step_factor_body = outline(split._4_compute_step_factor.fj1); +fork-coalesce(compute_step_factor, compute_step_factor_body); +simpl!(compute_step_factor, compute_step_factor_body); + +fork-tile[32, 0, false, false](compute_flux); +let split = fork-split(compute_flux); +let compute_flux_body = outline(split._6_compute_flux.fj1); +fork-coalesce(compute_flux, compute_flux_body); +simpl!(compute_flux, compute_flux_body); + +fork-tile[32, 0, false, false](time_step); +let split = fork-split(time_step); +let time_step_body = outline(split._7_time_step.fj1); +fork-coalesce(time_step, time_step_body); +simpl!(time_step, time_step_body); + +fork-tile[32, 0, false, false](copy_vars); +let split = fork-split(copy_vars); +let copy_vars_body = outline(split._8_copy_vars.fj1); +fork-coalesce(copy_vars, copy_vars_body); +simpl!(copy_vars, copy_vars_body); + +unforkify(compute_step_factor_body, compute_flux_body, time_step_body, copy_vars_body); gcm(*); diff --git a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch index 14eb6906..518c656d 100644 --- a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch +++ b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch @@ -24,7 +24,38 @@ fixpoint { fork-guard-elim(*); } simpl!(*); +no-memset(compute_step_factor@res, compute_flux_contributions@res, compute_flux@res, copy_vars@res); +unforkify(compute_flux@inner_loop); -unforkify(*); +fork-tile[32, 0, false, false](compute_step_factor); +let split = fork-split(compute_step_factor); +let compute_step_factor_body = outline(split._4_compute_step_factor.fj1); +fork-coalesce(compute_step_factor, compute_step_factor_body); +simpl!(compute_step_factor, compute_step_factor_body); +fork-tile[32, 0, false, false](compute_flux_contributions); +let split = fork-split(compute_flux_contributions); +let compute_flux_contributions_body = outline(split._6_compute_flux_contributions.fj1); +fork-coalesce(compute_flux_contributions, compute_flux_contributions_body); +simpl!(compute_flux_contributions, compute_flux_contributions_body); + +fork-tile[32, 0, false, false](compute_flux); +let split = fork-split(compute_flux); +let compute_flux_body = outline(split._7_compute_flux.fj1); +fork-coalesce(compute_flux, compute_flux_body); +simpl!(compute_flux, compute_flux_body); + +fork-tile[32, 0, false, false](time_step); +let split = fork-split(time_step); +let time_step_body = outline(split._8_time_step.fj1); +fork-coalesce(time_step, time_step_body); +simpl!(time_step, time_step_body); + +fork-tile[32, 0, false, false](copy_vars); +let split = fork-split(copy_vars); +let copy_vars_body = outline(split._9_copy_vars.fj1); +fork-coalesce(copy_vars, copy_vars_body); +simpl!(copy_vars, copy_vars_body); + +unforkify(compute_step_factor_body, compute_flux_contributions_body, compute_flux_body, time_step_body, copy_vars_body); gcm(*); diff --git a/juno_samples/rodinia/cfd/src/lib.rs b/juno_samples/rodinia/cfd/src/lib.rs index f9a5dd76..d61df4c5 100644 --- a/juno_samples/rodinia/cfd/src/lib.rs +++ b/juno_samples/rodinia/cfd/src/lib.rs @@ -237,6 +237,7 @@ pub fn cfd_harness(args: CFDInputs) { } = read_domain_geometry(data_file, block_size); let variables = initialize_variables(nelr, &ff_variable); + println!("Running CFD with nelr = {}.", nelr); let res_juno = if pre_euler { run_pre_euler( diff --git a/juno_samples/rodinia/cfd/src/pre_euler.jn b/juno_samples/rodinia/cfd/src/pre_euler.jn index c200f2db..979c2e9a 100644 --- a/juno_samples/rodinia/cfd/src/pre_euler.jn +++ b/juno_samples/rodinia/cfd/src/pre_euler.jn @@ -58,7 +58,7 @@ fn compute_speed_of_sound(density: f32, pressure: f32) -> f32 { } fn compute_step_factor<nelr: usize>(variables: Variables::<nelr>, areas: f32[nelr]) -> f32[nelr] { - let step_factors : f32[nelr]; + @res let step_factors : f32[nelr]; for i in 0..nelr { let density = variables.density[i]; @@ -109,10 +109,10 @@ fn compute_flux_contribution( fn compute_flux_contributions<nelr: usize>( variables: Variables::<nelr>, ) -> (Momentum::<nelr>, Momentum::<nelr>, Momentum::<nelr>, Momentum::<nelr>) { - let fc_momentum_x: Momentum::<nelr>; - let fc_momentum_y: Momentum::<nelr>; - let fc_momentum_z: Momentum::<nelr>; - let fc_density_energy: Momentum::<nelr>; + @res let fc_momentum_x: Momentum::<nelr>; + @res let fc_momentum_y: Momentum::<nelr>; + @res let fc_momentum_z: Momentum::<nelr>; + @res let fc_density_energy: Momentum::<nelr>; for i in 0..nelr { let density_i = variables.density[i]; @@ -167,9 +167,9 @@ fn compute_flux<nelr: usize>( ff_fc_momentum_z: float3, ) -> Variables::<nelr> { const smoothing_coefficient : f32 = 0.2; - let fluxes: Variables::<nelr>; + @res let fluxes: Variables::<nelr>; - for i in 0..nelr { + @outer_loop for i in 0..nelr { let density_i = variables.density[i]; let momentum_i = float3 { x: variables.momentum.x[i], @@ -201,7 +201,7 @@ fn compute_flux<nelr: usize>( let flux_i_momentum = float3 { x: 0.0, y: 0.0, z: 0.0 }; let flux_i_density_energy : f32 = 0.0; - for j in 0..NNB { + @inner_loop for j in 0..NNB { let nb = elements_surrounding_elements[j, i]; let normal = float3 { x: normals.x[j, i], @@ -328,7 +328,7 @@ fn time_step<nelr: usize>( } fn copy_vars<nelr: usize>(variables: Variables::<nelr>) -> Variables::<nelr> { - let result : Variables::<nelr>; + @res let result : Variables::<nelr>; for i in 0..nelr { result.density[i] = variables.density[i]; -- GitLab