From a3483234d1a5b7607cea444d8be67180bbc36816 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 2 Mar 2025 09:59:23 -0600
Subject: [PATCH] Multi-core cfd

---
 juno_samples/rodinia/cfd/src/cpu_euler.sch    | 28 ++++++++++++++--
 .../rodinia/cfd/src/cpu_pre_euler.sch         | 33 ++++++++++++++++++-
 juno_samples/rodinia/cfd/src/lib.rs           |  1 +
 juno_samples/rodinia/cfd/src/pre_euler.jn     | 18 +++++-----
 4 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch
index 4cf320a6..4e7ee3cf 100644
--- a/juno_samples/rodinia/cfd/src/cpu_euler.sch
+++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch
@@ -25,7 +25,31 @@ fixpoint {
 }
 simpl!(*);
 no-memset(compute_step_factor@res, compute_flux@res, copy_vars@res);
-parallel-reduce(time_step, copy_vars, compute_flux@outer_loop \ compute_flux@inner_loop);
+unforkify(compute_flux@inner_loop);
 
-unforkify(*);
+fork-tile[32, 0, false, false](compute_step_factor);
+let split = fork-split(compute_step_factor);
+let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
+fork-coalesce(compute_step_factor, compute_step_factor_body);
+simpl!(compute_step_factor, compute_step_factor_body);
+
+fork-tile[32, 0, false, false](compute_flux);
+let split = fork-split(compute_flux);
+let compute_flux_body = outline(split._6_compute_flux.fj1);
+fork-coalesce(compute_flux, compute_flux_body);
+simpl!(compute_flux, compute_flux_body);
+
+fork-tile[32, 0, false, false](time_step);
+let split = fork-split(time_step);
+let time_step_body = outline(split._7_time_step.fj1);
+fork-coalesce(time_step, time_step_body);
+simpl!(time_step, time_step_body);
+
+fork-tile[32, 0, false, false](copy_vars);
+let split = fork-split(copy_vars);
+let copy_vars_body = outline(split._8_copy_vars.fj1);
+fork-coalesce(copy_vars, copy_vars_body);
+simpl!(copy_vars, copy_vars_body);
+
+unforkify(compute_step_factor_body, compute_flux_body, time_step_body, copy_vars_body);
 gcm(*);
diff --git a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
index 14eb6906..518c656d 100644
--- a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
+++ b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
@@ -24,7 +24,38 @@ fixpoint {
   fork-guard-elim(*);
 }
 simpl!(*);
+no-memset(compute_step_factor@res, compute_flux_contributions@res, compute_flux@res, copy_vars@res);
+unforkify(compute_flux@inner_loop);
 
-unforkify(*);
+fork-tile[32, 0, false, false](compute_step_factor);
+let split = fork-split(compute_step_factor);
+let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
+fork-coalesce(compute_step_factor, compute_step_factor_body);
+simpl!(compute_step_factor, compute_step_factor_body);
 
+fork-tile[32, 0, false, false](compute_flux_contributions);
+let split = fork-split(compute_flux_contributions);
+let compute_flux_contributions_body = outline(split._6_compute_flux_contributions.fj1);
+fork-coalesce(compute_flux_contributions, compute_flux_contributions_body);
+simpl!(compute_flux_contributions, compute_flux_contributions_body);
+
+fork-tile[32, 0, false, false](compute_flux);
+let split = fork-split(compute_flux);
+let compute_flux_body = outline(split._7_compute_flux.fj1);
+fork-coalesce(compute_flux, compute_flux_body);
+simpl!(compute_flux, compute_flux_body);
+
+fork-tile[32, 0, false, false](time_step);
+let split = fork-split(time_step);
+let time_step_body = outline(split._8_time_step.fj1);
+fork-coalesce(time_step, time_step_body);
+simpl!(time_step, time_step_body);
+
+fork-tile[32, 0, false, false](copy_vars);
+let split = fork-split(copy_vars);
+let copy_vars_body = outline(split._9_copy_vars.fj1);
+fork-coalesce(copy_vars, copy_vars_body);
+simpl!(copy_vars, copy_vars_body);
+
+unforkify(compute_step_factor_body, compute_flux_contributions_body, compute_flux_body, time_step_body, copy_vars_body);
 gcm(*);
diff --git a/juno_samples/rodinia/cfd/src/lib.rs b/juno_samples/rodinia/cfd/src/lib.rs
index f9a5dd76..d61df4c5 100644
--- a/juno_samples/rodinia/cfd/src/lib.rs
+++ b/juno_samples/rodinia/cfd/src/lib.rs
@@ -237,6 +237,7 @@ pub fn cfd_harness(args: CFDInputs) {
     } = read_domain_geometry(data_file, block_size);
 
     let variables = initialize_variables(nelr, &ff_variable);
+    println!("Running CFD with nelr = {}.", nelr);
 
     let res_juno = if pre_euler {
         run_pre_euler(
diff --git a/juno_samples/rodinia/cfd/src/pre_euler.jn b/juno_samples/rodinia/cfd/src/pre_euler.jn
index c200f2db..979c2e9a 100644
--- a/juno_samples/rodinia/cfd/src/pre_euler.jn
+++ b/juno_samples/rodinia/cfd/src/pre_euler.jn
@@ -58,7 +58,7 @@ fn compute_speed_of_sound(density: f32, pressure: f32) -> f32 {
 }
 
 fn compute_step_factor<nelr: usize>(variables: Variables::<nelr>, areas: f32[nelr]) -> f32[nelr] {
-  let step_factors : f32[nelr];
+  @res let step_factors : f32[nelr];
 
   for i in 0..nelr {
     let density = variables.density[i];
@@ -109,10 +109,10 @@ fn compute_flux_contribution(
 fn compute_flux_contributions<nelr: usize>(
   variables: Variables::<nelr>,
 ) -> (Momentum::<nelr>, Momentum::<nelr>, Momentum::<nelr>, Momentum::<nelr>) {
-  let fc_momentum_x: Momentum::<nelr>;
-  let fc_momentum_y: Momentum::<nelr>;
-  let fc_momentum_z: Momentum::<nelr>;
-  let fc_density_energy: Momentum::<nelr>;
+  @res let fc_momentum_x: Momentum::<nelr>;
+  @res let fc_momentum_y: Momentum::<nelr>;
+  @res let fc_momentum_z: Momentum::<nelr>;
+  @res let fc_density_energy: Momentum::<nelr>;
 
   for i in 0..nelr {
     let density_i = variables.density[i];
@@ -167,9 +167,9 @@ fn compute_flux<nelr: usize>(
   ff_fc_momentum_z: float3,
 ) -> Variables::<nelr> {
   const smoothing_coefficient : f32 = 0.2;
-  let fluxes: Variables::<nelr>;
+  @res let fluxes: Variables::<nelr>;
 
-  for i in 0..nelr {
+  @outer_loop for i in 0..nelr {
     let density_i = variables.density[i];
 
     let momentum_i = float3 { x: variables.momentum.x[i],
@@ -201,7 +201,7 @@ fn compute_flux<nelr: usize>(
     let flux_i_momentum = float3 { x: 0.0, y: 0.0, z: 0.0 };
     let flux_i_density_energy : f32 = 0.0;
 
-    for j in 0..NNB {
+    @inner_loop for j in 0..NNB {
       let nb = elements_surrounding_elements[j, i];
       let normal = float3 {
         x: normals.x[j, i],
@@ -328,7 +328,7 @@ fn time_step<nelr: usize>(
 }
 
 fn copy_vars<nelr: usize>(variables: Variables::<nelr>) -> Variables::<nelr> {
-  let result : Variables::<nelr>;
+  @res let result : Variables::<nelr>;
 
   for i in 0..nelr {
     result.density[i] = variables.density[i];
-- 
GitLab