From 5f96afc726c24c14f6b612a74b01fa47b45f1ab5 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 25 Feb 2025 21:20:33 -0600 Subject: [PATCH] cfd bench --- Cargo.lock | 1 + juno_samples/rodinia/cfd/Cargo.toml | 10 + juno_samples/rodinia/cfd/benches/cfd_bench.rs | 130 ++++++++++ juno_samples/rodinia/cfd/src/lib.rs | 236 ++++++++++++++++++ juno_samples/rodinia/cfd/src/main.rs | 235 +---------------- 5 files changed, 378 insertions(+), 234 deletions(-) create mode 100644 juno_samples/rodinia/cfd/benches/cfd_bench.rs create mode 100644 juno_samples/rodinia/cfd/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 81394ef6..170a7f3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1297,6 +1297,7 @@ version = "0.1.0" dependencies = [ "async-std", "clap", + "criterion", "hercules_rt", "juno_build", "nom 8.0.0", diff --git a/juno_samples/rodinia/cfd/Cargo.toml b/juno_samples/rodinia/cfd/Cargo.toml index 542ca7a2..6720b527 100644 --- a/juno_samples/rodinia/cfd/Cargo.toml +++ b/juno_samples/rodinia/cfd/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" name = "juno_cfd" path = "src/main.rs" +[lib] +path = "src/lib.rs" + [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] @@ -21,3 +24,10 @@ async-std = "*" clap = { version = "*", features = ["derive"] } with_builtin_macros = "0.1.0" nom = "*" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "cfd_bench" +harness = false diff --git a/juno_samples/rodinia/cfd/benches/cfd_bench.rs b/juno_samples/rodinia/cfd/benches/cfd_bench.rs new file mode 100644 index 00000000..fd614b42 --- /dev/null +++ b/juno_samples/rodinia/cfd/benches/cfd_bench.rs @@ -0,0 +1,130 @@ +#![feature(concat_idents)] +use criterion::{criterion_group, criterion_main, Criterion}; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("euler"); +juno_build::juno!("pre_euler"); + +use juno_cfd::*; + +fn cfd_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("cfd bench"); + + let mut r = runner!(euler); + let data_file = "data/fvcorr.domn.097K".to_string(); + let iterations = 1; + let block_size = 16; + let FarFieldConditions { + ff_variable, + ff_fc_momentum_x, + ff_fc_momentum_y, + ff_fc_momentum_z, + ff_fc_density_energy, + } = set_far_field_conditions(); + let GeometryData { + nelr, + areas, + elements_surrounding_elements, + normals, + } = read_domain_geometry(data_file, block_size); + let mut variables = initialize_variables(nelr, ff_variable.as_slice()); + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas.as_slice()); + let elements_surrounding_elements = + HerculesImmBox::from(elements_surrounding_elements.as_slice()); + let normals = HerculesImmBox::from(normals.as_slice()); + let ff_variable = HerculesImmBox::from(ff_variable.as_slice()); + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + group.bench_function("cfd bench euler", |b| { + b.iter(|| { + async_std::task::block_on(async { + r.run( + nelr as u64, + iterations as u64, + variables.to(), + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + }); + }) + }); + + let mut r = runner!(pre_euler); + let data_file = "data/fvcorr.domn.097K".to_string(); + let iterations = 1; + let block_size = 16; + let FarFieldConditions { + ff_variable, + ff_fc_momentum_x, + ff_fc_momentum_y, + ff_fc_momentum_z, + ff_fc_density_energy, + } = set_far_field_conditions(); + let GeometryData { + nelr, + areas, + elements_surrounding_elements, + normals, + } = read_domain_geometry(data_file, block_size); + let mut variables = initialize_variables(nelr, ff_variable.as_slice()); + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas.as_slice()); + let elements_surrounding_elements = + HerculesImmBox::from(elements_surrounding_elements.as_slice()); + let normals = HerculesImmBox::from(normals.as_slice()); + let ff_variable = HerculesImmBox::from(ff_variable.as_slice()); + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + group.bench_function("cfd bench pre-euler", |b| { + b.iter(|| { + async_std::task::block_on(async { + r.run( + nelr as u64, + iterations as u64, + variables.to(), + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + }); + }) + }); +} + +criterion_group!(benches, cfd_bench); +criterion_main!(benches); diff --git a/juno_samples/rodinia/cfd/src/lib.rs b/juno_samples/rodinia/cfd/src/lib.rs new file mode 100644 index 00000000..39384c0d --- /dev/null +++ b/juno_samples/rodinia/cfd/src/lib.rs @@ -0,0 +1,236 @@ +#![feature(concat_idents)] +mod rust_cfd; +mod setup; + +use clap::Parser; + +pub use crate::setup::*; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("euler"); +juno_build::juno!("pre_euler"); + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct CFDInputs { + pub data_file: String, + pub iterations: usize, + pub block_size: usize, + #[clap(short = None, long = Some("pre-euler"))] + pub pre_euler: bool, +} + +fn run_euler( + nelr: usize, + iterations: usize, + mut variables: AlignedSlice<f32>, + areas: &[f32], + elements_surrounding_elements: &[i32], + normals: &[f32], + ff_variable: &[f32], + ff_fc_density_energy: &Float3, + ff_fc_momentum_x: &Float3, + ff_fc_momentum_y: &Float3, + ff_fc_momentum_z: &Float3, +) -> Vec<f32> { + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas); + let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); + let normals = HerculesImmBox::from(normals); + let ff_variable = HerculesImmBox::from(ff_variable); + + // TODO: Make hercules box handle structs, for now we'll copy into a vec + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + + let mut runner = runner!(euler); + + HerculesMutBox::from(async_std::task::block_on(async { + runner + .run( + nelr as u64, + iterations as u64, + variables.to(), + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + })) + .as_slice() + .to_vec() +} + +fn run_pre_euler( + nelr: usize, + iterations: usize, + mut variables: AlignedSlice<f32>, + areas: &[f32], + elements_surrounding_elements: &[i32], + normals: &[f32], + ff_variable: &[f32], + ff_fc_density_energy: &Float3, + ff_fc_momentum_x: &Float3, + ff_fc_momentum_y: &Float3, + ff_fc_momentum_z: &Float3, +) -> Vec<f32> { + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas); + let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); + let normals = HerculesImmBox::from(normals); + let ff_variable = HerculesImmBox::from(ff_variable); + + // TODO: Make hercules box handle structs, for now we'll copy into a vec + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + + let mut runner = runner!(pre_euler); + + let variables = variables.to(); + + HerculesMutBox::from(async_std::task::block_on(async { + runner + .run( + nelr as u64, + iterations as u64, + variables, + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + })) + .as_slice() + .to_vec() +} + +fn compare_float(x: f32, y: f32) -> bool { + (x - y).abs() < 1e-5 +} + +fn compare_floats(xs: &[f32], ys: &[f32]) -> bool { + xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y)) +} + +pub fn cfd_harness(args: CFDInputs) { + let CFDInputs { + data_file, + iterations, + block_size, + pre_euler, + } = args; + + assert!(block_size % 16 == 0, "Hercules expects all arrays to be 64-byte aligned, cfd uses structs of arrays that are annoying to deal with if the block_size is not a multiple of 16"); + + let FarFieldConditions { + ff_variable, + ff_fc_momentum_x, + ff_fc_momentum_y, + ff_fc_momentum_z, + ff_fc_density_energy, + } = set_far_field_conditions(); + + let GeometryData { + nelr, + areas, + elements_surrounding_elements, + normals, + } = read_domain_geometry(data_file, block_size); + + let variables = initialize_variables(nelr, ff_variable.as_slice()); + + let res_juno = if pre_euler { + run_pre_euler( + nelr, + iterations, + variables.clone(), + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + } else { + run_euler( + nelr, + iterations, + variables.clone(), + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + }; + let res_rust = if pre_euler { + rust_cfd::pre_euler( + nelr, + iterations, + variables, + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + } else { + rust_cfd::euler( + nelr, + iterations, + variables, + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + }; + + if !compare_floats(&res_juno, res_rust.as_slice()) { + assert_eq!(res_juno.len(), res_rust.as_slice().len()); + panic!("Mismatch in results"); + } +} diff --git a/juno_samples/rodinia/cfd/src/main.rs b/juno_samples/rodinia/cfd/src/main.rs index fab241fa..277a3edb 100644 --- a/juno_samples/rodinia/cfd/src/main.rs +++ b/juno_samples/rodinia/cfd/src/main.rs @@ -1,239 +1,6 @@ -#![feature(concat_idents)] -mod rust_cfd; -mod setup; - use clap::Parser; -use crate::setup::*; - -use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; - -juno_build::juno!("euler"); -juno_build::juno!("pre_euler"); - -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -struct CFDInputs { - data_file: String, - iterations: usize, - block_size: usize, - #[clap(short = None, long = Some("pre-euler"))] - pre_euler: bool, -} - -fn run_euler( - nelr: usize, - iterations: usize, - mut variables: AlignedSlice<f32>, - areas: &[f32], - elements_surrounding_elements: &[i32], - normals: &[f32], - ff_variable: &[f32], - ff_fc_density_energy: &Float3, - ff_fc_momentum_x: &Float3, - ff_fc_momentum_y: &Float3, - ff_fc_momentum_z: &Float3, -) -> Vec<f32> { - let mut variables = HerculesMutBox::from(variables.as_mut_slice()); - let areas = HerculesImmBox::from(areas); - let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); - let normals = HerculesImmBox::from(normals); - let ff_variable = HerculesImmBox::from(ff_variable); - - // TODO: Make hercules box handle structs, for now we'll copy into a vec - let ff_fc_density_energy = vec![ - ff_fc_density_energy.x, - ff_fc_density_energy.y, - ff_fc_density_energy.z, - ]; - let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); - let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; - let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); - let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; - let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); - let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; - let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); - - let mut runner = runner!(euler); - - HerculesMutBox::from(async_std::task::block_on(async { - runner - .run( - nelr as u64, - iterations as u64, - variables.to(), - areas.to(), - elements_surrounding_elements.to(), - normals.to(), - ff_variable.to(), - ff_fc_density_energy.to(), - ff_fc_momentum_x.to(), - ff_fc_momentum_y.to(), - ff_fc_momentum_z.to(), - ) - .await - })) - .as_slice() - .to_vec() -} - -fn run_pre_euler( - nelr: usize, - iterations: usize, - mut variables: AlignedSlice<f32>, - areas: &[f32], - elements_surrounding_elements: &[i32], - normals: &[f32], - ff_variable: &[f32], - ff_fc_density_energy: &Float3, - ff_fc_momentum_x: &Float3, - ff_fc_momentum_y: &Float3, - ff_fc_momentum_z: &Float3, -) -> Vec<f32> { - let mut variables = HerculesMutBox::from(variables.as_mut_slice()); - let areas = HerculesImmBox::from(areas); - let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); - let normals = HerculesImmBox::from(normals); - let ff_variable = HerculesImmBox::from(ff_variable); - - // TODO: Make hercules box handle structs, for now we'll copy into a vec - let ff_fc_density_energy = vec![ - ff_fc_density_energy.x, - ff_fc_density_energy.y, - ff_fc_density_energy.z, - ]; - let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); - let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; - let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); - let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; - let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); - let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; - let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); - - let mut runner = runner!(pre_euler); - - let variables = variables.to(); - - HerculesMutBox::from(async_std::task::block_on(async { - runner - .run( - nelr as u64, - iterations as u64, - variables, - areas.to(), - elements_surrounding_elements.to(), - normals.to(), - ff_variable.to(), - ff_fc_density_energy.to(), - ff_fc_momentum_x.to(), - ff_fc_momentum_y.to(), - ff_fc_momentum_z.to(), - ) - .await - })) - .as_slice() - .to_vec() -} - -fn compare_float(x: f32, y: f32) -> bool { - (x - y).abs() < 1e-5 -} - -fn compare_floats(xs: &[f32], ys: &[f32]) -> bool { - xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y)) -} - -fn cfd_harness(args: CFDInputs) { - let CFDInputs { - data_file, - iterations, - block_size, - pre_euler, - } = args; - - assert!(block_size % 16 == 0, "Hercules expects all arrays to be 64-byte aligned, cfd uses structs of arrays that are annoying to deal with if the block_size is not a multiple of 16"); - - let FarFieldConditions { - ff_variable, - ff_fc_momentum_x, - ff_fc_momentum_y, - ff_fc_momentum_z, - ff_fc_density_energy, - } = set_far_field_conditions(); - - let GeometryData { - nelr, - areas, - elements_surrounding_elements, - normals, - } = read_domain_geometry(data_file, block_size); - - let variables = initialize_variables(nelr, ff_variable.as_slice()); - - let res_juno = if pre_euler { - run_pre_euler( - nelr, - iterations, - variables.clone(), - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - } else { - run_euler( - nelr, - iterations, - variables.clone(), - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - }; - let res_rust = if pre_euler { - rust_cfd::pre_euler( - nelr, - iterations, - variables, - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - } else { - rust_cfd::euler( - nelr, - iterations, - variables, - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - }; - - if !compare_floats(&res_juno, res_rust.as_slice()) { - assert_eq!(res_juno.len(), res_rust.as_slice().len()); - panic!("Mismatch in results"); - } -} +use juno_cfd::{cfd_harness, CFDInputs}; fn main() { let args = CFDInputs::parse(); -- GitLab