From 65ca4faf0b676522dc27690d481aec4c82861f52 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 25 Feb 2025 17:20:07 -0600 Subject: [PATCH 1/5] bench for backprop but weird linking error --- Cargo.lock | 1 + juno_samples/rodinia/backprop/Cargo.toml | 10 ++ .../backprop/benches/backprop_bench.rs | 65 ++++++++ juno_samples/rodinia/backprop/src/lib.rs | 156 ++++++++++++++++++ juno_samples/rodinia/backprop/src/main.rs | 155 +---------------- 5 files changed, 233 insertions(+), 154 deletions(-) create mode 100644 juno_samples/rodinia/backprop/benches/backprop_bench.rs create mode 100644 juno_samples/rodinia/backprop/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index c872be3a..5916a17c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1238,6 +1238,7 @@ version = "0.1.0" dependencies = [ "async-std", "clap", + "criterion", "hercules_rt", "juno_build", "nom 8.0.0", diff --git a/juno_samples/rodinia/backprop/Cargo.toml b/juno_samples/rodinia/backprop/Cargo.toml index 729b3969..25185e09 100644 --- a/juno_samples/rodinia/backprop/Cargo.toml +++ b/juno_samples/rodinia/backprop/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" name = "juno_backprop" path = "src/main.rs" +[lib] +path = "src/lib.rs" + [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] @@ -22,3 +25,10 @@ clap = { version = "*", features = ["derive"] } with_builtin_macros = "0.1.0" nom = "*" rand = "0.9.0" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "backprop_bench" +harness = false \ No newline at end of file diff --git a/juno_samples/rodinia/backprop/benches/backprop_bench.rs b/juno_samples/rodinia/backprop/benches/backprop_bench.rs new file mode 100644 index 00000000..40cad98f --- /dev/null +++ b/juno_samples/rodinia/backprop/benches/backprop_bench.rs @@ -0,0 +1,65 @@ +#![feature(concat_idents)] +use criterion::{criterion_group, criterion_main, Criterion}; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("backprop"); + +fn backprop_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("backprop bench"); + group.sample_size(10); + + let mut rng = StdRng::seed_from_u64(7); + + let input_n = 65536; + let hidden_n = 16; + let output_n = 1; + + let mut input_vals = vec![0.0f32; input_n + 1]; + input_vals[0] = 1.0; + + // For some reason the bpnn_randomize_row function used on target just sets it to 0.1 + let target = vec![0.1f32; output_n + 1]; + + let input_weights = (0..(input_n + 1) * (hidden_n + 1)) + .map(|_| rng.random::<f32>()) + .collect::<Vec<_>>(); + let hidden_weights = (0..(hidden_n + 1) * (output_n + 1)) + .map(|_| rng.random::<f32>()) + .collect::<Vec<_>>(); + + let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)]; + let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)]; + + let mut r = runner!(backprop); + let input_vals = HerculesImmBox::from(&input_vals as &[f32]); + let target = HerculesImmBox::from(&target as &[f32]); + let mut input_weights = HerculesMutBox::from(input_weights.to_vec()); + let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec()); + let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec()); + let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec()); + + group.bench_function("backprop bench", |b| { + b.iter(|| { + async_std::task::block_on(async { + r.run( + input_n as u64, + hidden_n as u64, + output_n as u64, + input_vals.to(), + input_weights.to(), + hidden_weights.to(), + target.to(), + input_prev_weights.to(), + hidden_prev_weights.to(), + ) + .await + }); + }) + }); +} + +criterion_group!(benches, backprop_bench); +criterion_main!(benches); diff --git a/juno_samples/rodinia/backprop/src/lib.rs b/juno_samples/rodinia/backprop/src/lib.rs new file mode 100644 index 00000000..e2fc2ad5 --- /dev/null +++ b/juno_samples/rodinia/backprop/src/lib.rs @@ -0,0 +1,156 @@ +#![feature(concat_idents)] + +juno_build::juno!("backprop"); + +mod rust_backprop; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; + +use clap::Parser; + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct BackpropInputs { + pub layer_size: usize, +} + +fn run_backprop( + input_n: u64, + hidden_n: u64, + output_n: u64, + input_vals: &[f32], + input_weights: &[f32], + hidden_weights: &[f32], + target: &[f32], + input_prev_weights: &[f32], + hidden_prev_weights: &[f32], +) -> (f32, f32, Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) { + let input_vals = HerculesImmBox::from(input_vals); + let target = HerculesImmBox::from(target); + + let mut input_weights = HerculesMutBox::from(input_weights.to_vec()); + let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec()); + let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec()); + let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec()); + + let mut runner = runner!(backprop); + let (out_err, hid_err, input_weights, input_prev_weights, hidden_weights, hidden_prev_weights) = + async_std::task::block_on(async { + runner + .run( + input_n, + hidden_n, + output_n, + input_vals.to(), + input_weights.to(), + hidden_weights.to(), + target.to(), + input_prev_weights.to(), + hidden_prev_weights.to(), + ) + .await + }); + let mut input_weights = HerculesMutBox::from(input_weights); + let mut hidden_weights = HerculesMutBox::from(hidden_weights); + let mut input_prev_weights = HerculesMutBox::from(input_prev_weights); + let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights); + + ( + out_err, + hid_err, + input_weights.as_slice().to_vec(), + hidden_weights.as_slice().to_vec(), + input_prev_weights.as_slice().to_vec(), + hidden_prev_weights.as_slice().to_vec(), + ) +} + +fn compare_float(x: f32, y: f32) -> bool { + (x - y).abs() < 1e-5 +} + +fn compare_floats(xs: &[f32], ys: &[f32]) -> bool { + xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y)) +} + +pub fn backprop_harness(args: BackpropInputs) { + let BackpropInputs { layer_size } = args; + + let mut rng = StdRng::seed_from_u64(7); + + let input_n = layer_size; + let hidden_n = 16; + let output_n = 1; + + let mut input_vals = vec![0.0; input_n + 1]; + input_vals[0] = 1.0; + + // For some reason the bpnn_randomize_row function used on target just sets it to 0.1 + let target = vec![0.1; output_n + 1]; + + let input_weights = (0..(input_n + 1) * (hidden_n + 1)) + .map(|_| rng.random::<f32>()) + .collect::<Vec<_>>(); + let hidden_weights = (0..(hidden_n + 1) * (output_n + 1)) + .map(|_| rng.random::<f32>()) + .collect::<Vec<_>>(); + + let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)]; + let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)]; + + let ( + juno_out_err, + juno_hid_err, + juno_input_weights, + juno_hidden_weights, + juno_input_prev_weights, + juno_hidden_prev_weights, + ) = run_backprop( + input_n as u64, + hidden_n as u64, + output_n as u64, + &input_vals, + &input_weights, + &hidden_weights, + &target, + &input_prev_weights, + &hidden_prev_weights, + ); + + let ( + rust_out_err, + rust_hid_err, + rust_input_weights, + rust_hidden_weights, + rust_input_prev_weights, + rust_hidden_prev_weights, + ) = rust_backprop::backprop( + input_n, + hidden_n, + output_n, + &input_vals, + input_weights, + hidden_weights, + &target, + input_prev_weights, + hidden_prev_weights, + ); + + assert!(compare_float(juno_out_err, rust_out_err)); + assert!(compare_float(juno_hid_err, rust_hid_err)); + if !compare_floats(&juno_input_weights, &rust_input_weights) { + panic!("Input weights do not match after training"); + } + if !compare_floats(&juno_hidden_weights, &rust_hidden_weights) { + panic!("Hidden weights do not match after training"); + } + if !compare_floats(&juno_input_prev_weights, &rust_input_prev_weights) { + panic!("Input prev_weights do not match after training"); + } + if !compare_floats(&juno_hidden_prev_weights, &rust_hidden_prev_weights) { + panic!("Hidden prev_weights do not match after training"); + } +} diff --git a/juno_samples/rodinia/backprop/src/main.rs b/juno_samples/rodinia/backprop/src/main.rs index fa80a7a5..bb0d13a1 100644 --- a/juno_samples/rodinia/backprop/src/main.rs +++ b/juno_samples/rodinia/backprop/src/main.rs @@ -1,159 +1,6 @@ -#![feature(concat_idents)] - -juno_build::juno!("backprop"); - -mod rust_backprop; - -use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; - -use rand::rngs::StdRng; -use rand::{Rng, SeedableRng}; - use clap::Parser; -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -struct BackpropInputs { - layer_size: usize, -} - -fn run_backprop( - input_n: u64, - hidden_n: u64, - output_n: u64, - input_vals: &[f32], - input_weights: &[f32], - hidden_weights: &[f32], - target: &[f32], - input_prev_weights: &[f32], - hidden_prev_weights: &[f32], -) -> (f32, f32, Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) { - let input_vals = HerculesImmBox::from(input_vals); - let target = HerculesImmBox::from(target); - - let mut input_weights = HerculesMutBox::from(input_weights.to_vec()); - let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec()); - let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec()); - let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec()); - - let mut runner = runner!(backprop); - let (out_err, hid_err, input_weights, input_prev_weights, hidden_weights, hidden_prev_weights) = - async_std::task::block_on(async { - runner - .run( - input_n, - hidden_n, - output_n, - input_vals.to(), - input_weights.to(), - hidden_weights.to(), - target.to(), - input_prev_weights.to(), - hidden_prev_weights.to(), - ) - .await - }); - let mut input_weights = HerculesMutBox::from(input_weights); - let mut hidden_weights = HerculesMutBox::from(hidden_weights); - let mut input_prev_weights = HerculesMutBox::from(input_prev_weights); - let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights); - - ( - out_err, - hid_err, - input_weights.as_slice().to_vec(), - hidden_weights.as_slice().to_vec(), - input_prev_weights.as_slice().to_vec(), - hidden_prev_weights.as_slice().to_vec(), - ) -} - -fn compare_float(x: f32, y: f32) -> bool { - (x - y).abs() < 1e-5 -} - -fn compare_floats(xs: &[f32], ys: &[f32]) -> bool { - xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y)) -} - -fn backprop_harness(args: BackpropInputs) { - let BackpropInputs { layer_size } = args; - - let mut rng = StdRng::seed_from_u64(7); - - let input_n = layer_size; - let hidden_n = 16; - let output_n = 1; - - let mut input_vals = vec![0.0; input_n + 1]; - input_vals[0] = 1.0; - - // For some reason the bpnn_randomize_row function used on target just sets it to 0.1 - let target = vec![0.1; output_n + 1]; - - let input_weights = (0..(input_n + 1) * (hidden_n + 1)) - .map(|_| rng.random::<f32>()) - .collect::<Vec<_>>(); - let hidden_weights = (0..(hidden_n + 1) * (output_n + 1)) - .map(|_| rng.random::<f32>()) - .collect::<Vec<_>>(); - - let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)]; - let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)]; - - let ( - juno_out_err, - juno_hid_err, - juno_input_weights, - juno_hidden_weights, - juno_input_prev_weights, - juno_hidden_prev_weights, - ) = run_backprop( - input_n as u64, - hidden_n as u64, - output_n as u64, - &input_vals, - &input_weights, - &hidden_weights, - &target, - &input_prev_weights, - &hidden_prev_weights, - ); - - let ( - rust_out_err, - rust_hid_err, - rust_input_weights, - rust_hidden_weights, - rust_input_prev_weights, - rust_hidden_prev_weights, - ) = rust_backprop::backprop( - input_n, - hidden_n, - output_n, - &input_vals, - input_weights, - hidden_weights, - &target, - input_prev_weights, - hidden_prev_weights, - ); - - assert!(compare_float(juno_out_err, rust_out_err)); - assert!(compare_float(juno_hid_err, rust_hid_err)); - if !compare_floats(&juno_input_weights, &rust_input_weights) { - panic!("Input weights do not match after training"); - } - if !compare_floats(&juno_hidden_weights, &rust_hidden_weights) { - panic!("Hidden weights do not match after training"); - } - if !compare_floats(&juno_input_prev_weights, &rust_input_prev_weights) { - panic!("Input prev_weights do not match after training"); - } - if !compare_floats(&juno_hidden_prev_weights, &rust_hidden_prev_weights) { - panic!("Hidden prev_weights do not match after training"); - } -} +use juno_backprop::{backprop_harness, BackpropInputs}; fn main() { let args = BackpropInputs::parse(); -- GitLab From cbd5e70e22944f444da8cb1bebf310eea9c13622 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 25 Feb 2025 17:46:04 -0600 Subject: [PATCH 2/5] whyyyyy --- juno_samples/rodinia/backprop/benches/backprop_bench.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/juno_samples/rodinia/backprop/benches/backprop_bench.rs b/juno_samples/rodinia/backprop/benches/backprop_bench.rs index 40cad98f..17bdf6a7 100644 --- a/juno_samples/rodinia/backprop/benches/backprop_bench.rs +++ b/juno_samples/rodinia/backprop/benches/backprop_bench.rs @@ -7,6 +7,11 @@ use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, Herc juno_build::juno!("backprop"); +// We need this even though we don't use anything from the library because of +// Rust build scripts only linking static libraries into the library, and not +// into the benchmark binary. Ugh! +use juno_backprop::*; + fn backprop_bench(c: &mut Criterion) { let mut group = c.benchmark_group("backprop bench"); group.sample_size(10); -- GitLab From b4e9f25846d161122e147083508dde02dd975752 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 25 Feb 2025 17:56:13 -0600 Subject: [PATCH 3/5] bfs bench --- Cargo.lock | 1 + juno_samples/rodinia/bfs/Cargo.toml | 10 +++++ juno_samples/rodinia/bfs/benches/bfs_bench.rs | 41 +++++++++++++++++ juno_samples/rodinia/bfs/src/lib.rs | 44 +++++++++++++++++++ juno_samples/rodinia/bfs/src/main.rs | 43 +----------------- 5 files changed, 97 insertions(+), 42 deletions(-) create mode 100644 juno_samples/rodinia/bfs/benches/bfs_bench.rs create mode 100644 juno_samples/rodinia/bfs/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 5916a17c..81394ef6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1252,6 +1252,7 @@ version = "0.1.0" dependencies = [ "async-std", "clap", + "criterion", "hercules_rt", "juno_build", "nom 8.0.0", diff --git a/juno_samples/rodinia/bfs/Cargo.toml b/juno_samples/rodinia/bfs/Cargo.toml index 2ae6c8c0..34b6f5ce 100644 --- a/juno_samples/rodinia/bfs/Cargo.toml +++ b/juno_samples/rodinia/bfs/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" name = "juno_bfs" path = "src/main.rs" +[lib] +path = "src/lib.rs" + [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] @@ -21,3 +24,10 @@ async-std = "*" clap = { version = "*", features = ["derive"] } with_builtin_macros = "0.1.0" nom = "*" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "bfs_bench" +harness = false diff --git a/juno_samples/rodinia/bfs/benches/bfs_bench.rs b/juno_samples/rodinia/bfs/benches/bfs_bench.rs new file mode 100644 index 00000000..bf39a0fc --- /dev/null +++ b/juno_samples/rodinia/bfs/benches/bfs_bench.rs @@ -0,0 +1,41 @@ +#![feature(concat_idents)] +use criterion::{criterion_group, criterion_main, Criterion}; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo}; + +juno_build::juno!("bfs"); + +use juno_bfs::graph_parser::*; + +fn bfs_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("bfs bench"); + + let mut r = runner!(bfs); + + let input = "data/graph4096.txt"; + let (nodes, source, edges) = parse_graph(input.into()); + let n = nodes.len() as u64; + let m = edges.len() as u64; + let nodes = HerculesImmBox::from(&nodes as &[Node]); + let edges = HerculesImmBox::from(&edges as &[u32]); + group.bench_function("bfs bench 4096", |b| { + b.iter(|| { + async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await }); + }) + }); + + let input = "data/graph65536.txt"; + let (nodes, source, edges) = parse_graph(input.into()); + let n = nodes.len() as u64; + let m = edges.len() as u64; + let nodes = HerculesImmBox::from(&nodes as &[Node]); + let edges = HerculesImmBox::from(&edges as &[u32]); + group.bench_function("bfs bench 65536", |b| { + b.iter(|| { + async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await }); + }) + }); +} + +criterion_group!(benches, bfs_bench); +criterion_main!(benches); diff --git a/juno_samples/rodinia/bfs/src/lib.rs b/juno_samples/rodinia/bfs/src/lib.rs new file mode 100644 index 00000000..218e9bb0 --- /dev/null +++ b/juno_samples/rodinia/bfs/src/lib.rs @@ -0,0 +1,44 @@ +#![feature(concat_idents)] +pub mod graph_parser; +mod rust_bfs; + +use graph_parser::*; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox}; + +use clap::Parser; + +juno_build::juno!("bfs"); + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct BFSInputs { + pub input: String, +} + +fn run_bfs(nodes: &[Node], source: u32, edges: &[u32]) -> Vec<i32> { + let n = nodes.len() as u64; + let m = edges.len() as u64; + + let nodes = HerculesImmBox::from(nodes); + let edges = HerculesImmBox::from(edges); + + let mut runner = runner!(bfs); + + HerculesMutBox::from(async_std::task::block_on(async { + runner.run(n, m, nodes.to(), source, edges.to()).await + })) + .as_slice() + .to_vec() +} + +pub fn bfs_harness(args: BFSInputs) { + let BFSInputs { input } = args; + + let (nodes, source, edges) = parse_graph(input); + + let costs_juno = run_bfs(&nodes, source, &edges); + let costs_ref = rust_bfs::bfs(&nodes, source, &edges); + + assert_eq!(costs_juno, costs_ref); +} diff --git a/juno_samples/rodinia/bfs/src/main.rs b/juno_samples/rodinia/bfs/src/main.rs index 21e48c35..0ad23b00 100644 --- a/juno_samples/rodinia/bfs/src/main.rs +++ b/juno_samples/rodinia/bfs/src/main.rs @@ -1,47 +1,6 @@ -#![feature(concat_idents)] -mod graph_parser; -mod rust_bfs; - -use graph_parser::*; - -use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox}; - use clap::Parser; -juno_build::juno!("bfs"); - -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -struct BFSInputs { - input: String, -} - -fn run_bfs(nodes: &[Node], source: u32, edges: &[u32]) -> Vec<i32> { - let n = nodes.len() as u64; - let m = edges.len() as u64; - - let nodes = HerculesImmBox::from(nodes); - let edges = HerculesImmBox::from(edges); - - let mut runner = runner!(bfs); - - HerculesMutBox::from(async_std::task::block_on(async { - runner.run(n, m, nodes.to(), source, edges.to()).await - })) - .as_slice() - .to_vec() -} - -fn bfs_harness(args: BFSInputs) { - let BFSInputs { input } = args; - - let (nodes, source, edges) = parse_graph(input); - - let costs_juno = run_bfs(&nodes, source, &edges); - let costs_ref = rust_bfs::bfs(&nodes, source, &edges); - - assert_eq!(costs_juno, costs_ref); -} +use juno_bfs::{bfs_harness, BFSInputs}; fn main() { let args = BFSInputs::parse(); -- GitLab From 5f96afc726c24c14f6b612a74b01fa47b45f1ab5 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 25 Feb 2025 21:20:33 -0600 Subject: [PATCH 4/5] cfd bench --- Cargo.lock | 1 + juno_samples/rodinia/cfd/Cargo.toml | 10 + juno_samples/rodinia/cfd/benches/cfd_bench.rs | 130 ++++++++++ juno_samples/rodinia/cfd/src/lib.rs | 236 ++++++++++++++++++ juno_samples/rodinia/cfd/src/main.rs | 235 +---------------- 5 files changed, 378 insertions(+), 234 deletions(-) create mode 100644 juno_samples/rodinia/cfd/benches/cfd_bench.rs create mode 100644 juno_samples/rodinia/cfd/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 81394ef6..170a7f3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1297,6 +1297,7 @@ version = "0.1.0" dependencies = [ "async-std", "clap", + "criterion", "hercules_rt", "juno_build", "nom 8.0.0", diff --git a/juno_samples/rodinia/cfd/Cargo.toml b/juno_samples/rodinia/cfd/Cargo.toml index 542ca7a2..6720b527 100644 --- a/juno_samples/rodinia/cfd/Cargo.toml +++ b/juno_samples/rodinia/cfd/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" name = "juno_cfd" path = "src/main.rs" +[lib] +path = "src/lib.rs" + [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] @@ -21,3 +24,10 @@ async-std = "*" clap = { version = "*", features = ["derive"] } with_builtin_macros = "0.1.0" nom = "*" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "cfd_bench" +harness = false diff --git a/juno_samples/rodinia/cfd/benches/cfd_bench.rs b/juno_samples/rodinia/cfd/benches/cfd_bench.rs new file mode 100644 index 00000000..fd614b42 --- /dev/null +++ b/juno_samples/rodinia/cfd/benches/cfd_bench.rs @@ -0,0 +1,130 @@ +#![feature(concat_idents)] +use criterion::{criterion_group, criterion_main, Criterion}; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("euler"); +juno_build::juno!("pre_euler"); + +use juno_cfd::*; + +fn cfd_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("cfd bench"); + + let mut r = runner!(euler); + let data_file = "data/fvcorr.domn.097K".to_string(); + let iterations = 1; + let block_size = 16; + let FarFieldConditions { + ff_variable, + ff_fc_momentum_x, + ff_fc_momentum_y, + ff_fc_momentum_z, + ff_fc_density_energy, + } = set_far_field_conditions(); + let GeometryData { + nelr, + areas, + elements_surrounding_elements, + normals, + } = read_domain_geometry(data_file, block_size); + let mut variables = initialize_variables(nelr, ff_variable.as_slice()); + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas.as_slice()); + let elements_surrounding_elements = + HerculesImmBox::from(elements_surrounding_elements.as_slice()); + let normals = HerculesImmBox::from(normals.as_slice()); + let ff_variable = HerculesImmBox::from(ff_variable.as_slice()); + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + group.bench_function("cfd bench euler", |b| { + b.iter(|| { + async_std::task::block_on(async { + r.run( + nelr as u64, + iterations as u64, + variables.to(), + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + }); + }) + }); + + let mut r = runner!(pre_euler); + let data_file = "data/fvcorr.domn.097K".to_string(); + let iterations = 1; + let block_size = 16; + let FarFieldConditions { + ff_variable, + ff_fc_momentum_x, + ff_fc_momentum_y, + ff_fc_momentum_z, + ff_fc_density_energy, + } = set_far_field_conditions(); + let GeometryData { + nelr, + areas, + elements_surrounding_elements, + normals, + } = read_domain_geometry(data_file, block_size); + let mut variables = initialize_variables(nelr, ff_variable.as_slice()); + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas.as_slice()); + let elements_surrounding_elements = + HerculesImmBox::from(elements_surrounding_elements.as_slice()); + let normals = HerculesImmBox::from(normals.as_slice()); + let ff_variable = HerculesImmBox::from(ff_variable.as_slice()); + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + group.bench_function("cfd bench pre-euler", |b| { + b.iter(|| { + async_std::task::block_on(async { + r.run( + nelr as u64, + iterations as u64, + variables.to(), + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + }); + }) + }); +} + +criterion_group!(benches, cfd_bench); +criterion_main!(benches); diff --git a/juno_samples/rodinia/cfd/src/lib.rs b/juno_samples/rodinia/cfd/src/lib.rs new file mode 100644 index 00000000..39384c0d --- /dev/null +++ b/juno_samples/rodinia/cfd/src/lib.rs @@ -0,0 +1,236 @@ +#![feature(concat_idents)] +mod rust_cfd; +mod setup; + +use clap::Parser; + +pub use crate::setup::*; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("euler"); +juno_build::juno!("pre_euler"); + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct CFDInputs { + pub data_file: String, + pub iterations: usize, + pub block_size: usize, + #[clap(short = None, long = Some("pre-euler"))] + pub pre_euler: bool, +} + +fn run_euler( + nelr: usize, + iterations: usize, + mut variables: AlignedSlice<f32>, + areas: &[f32], + elements_surrounding_elements: &[i32], + normals: &[f32], + ff_variable: &[f32], + ff_fc_density_energy: &Float3, + ff_fc_momentum_x: &Float3, + ff_fc_momentum_y: &Float3, + ff_fc_momentum_z: &Float3, +) -> Vec<f32> { + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas); + let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); + let normals = HerculesImmBox::from(normals); + let ff_variable = HerculesImmBox::from(ff_variable); + + // TODO: Make hercules box handle structs, for now we'll copy into a vec + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + + let mut runner = runner!(euler); + + HerculesMutBox::from(async_std::task::block_on(async { + runner + .run( + nelr as u64, + iterations as u64, + variables.to(), + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + })) + .as_slice() + .to_vec() +} + +fn run_pre_euler( + nelr: usize, + iterations: usize, + mut variables: AlignedSlice<f32>, + areas: &[f32], + elements_surrounding_elements: &[i32], + normals: &[f32], + ff_variable: &[f32], + ff_fc_density_energy: &Float3, + ff_fc_momentum_x: &Float3, + ff_fc_momentum_y: &Float3, + ff_fc_momentum_z: &Float3, +) -> Vec<f32> { + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas); + let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); + let normals = HerculesImmBox::from(normals); + let ff_variable = HerculesImmBox::from(ff_variable); + + // TODO: Make hercules box handle structs, for now we'll copy into a vec + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + + let mut runner = runner!(pre_euler); + + let variables = variables.to(); + + HerculesMutBox::from(async_std::task::block_on(async { + runner + .run( + nelr as u64, + iterations as u64, + variables, + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + })) + .as_slice() + .to_vec() +} + +fn compare_float(x: f32, y: f32) -> bool { + (x - y).abs() < 1e-5 +} + +fn compare_floats(xs: &[f32], ys: &[f32]) -> bool { + xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y)) +} + +pub fn cfd_harness(args: CFDInputs) { + let CFDInputs { + data_file, + iterations, + block_size, + pre_euler, + } = args; + + assert!(block_size % 16 == 0, "Hercules expects all arrays to be 64-byte aligned, cfd uses structs of arrays that are annoying to deal with if the block_size is not a multiple of 16"); + + let FarFieldConditions { + ff_variable, + ff_fc_momentum_x, + ff_fc_momentum_y, + ff_fc_momentum_z, + ff_fc_density_energy, + } = set_far_field_conditions(); + + let GeometryData { + nelr, + areas, + elements_surrounding_elements, + normals, + } = read_domain_geometry(data_file, block_size); + + let variables = initialize_variables(nelr, ff_variable.as_slice()); + + let res_juno = if pre_euler { + run_pre_euler( + nelr, + iterations, + variables.clone(), + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + } else { + run_euler( + nelr, + iterations, + variables.clone(), + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + }; + let res_rust = if pre_euler { + rust_cfd::pre_euler( + nelr, + iterations, + variables, + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + } else { + rust_cfd::euler( + nelr, + iterations, + variables, + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + }; + + if !compare_floats(&res_juno, res_rust.as_slice()) { + assert_eq!(res_juno.len(), res_rust.as_slice().len()); + panic!("Mismatch in results"); + } +} diff --git a/juno_samples/rodinia/cfd/src/main.rs b/juno_samples/rodinia/cfd/src/main.rs index fab241fa..277a3edb 100644 --- a/juno_samples/rodinia/cfd/src/main.rs +++ b/juno_samples/rodinia/cfd/src/main.rs @@ -1,239 +1,6 @@ -#![feature(concat_idents)] -mod rust_cfd; -mod setup; - use clap::Parser; -use crate::setup::*; - -use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; - -juno_build::juno!("euler"); -juno_build::juno!("pre_euler"); - -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -struct CFDInputs { - data_file: String, - iterations: usize, - block_size: usize, - #[clap(short = None, long = Some("pre-euler"))] - pre_euler: bool, -} - -fn run_euler( - nelr: usize, - iterations: usize, - mut variables: AlignedSlice<f32>, - areas: &[f32], - elements_surrounding_elements: &[i32], - normals: &[f32], - ff_variable: &[f32], - ff_fc_density_energy: &Float3, - ff_fc_momentum_x: &Float3, - ff_fc_momentum_y: &Float3, - ff_fc_momentum_z: &Float3, -) -> Vec<f32> { - let mut variables = HerculesMutBox::from(variables.as_mut_slice()); - let areas = HerculesImmBox::from(areas); - let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); - let normals = HerculesImmBox::from(normals); - let ff_variable = HerculesImmBox::from(ff_variable); - - // TODO: Make hercules box handle structs, for now we'll copy into a vec - let ff_fc_density_energy = vec![ - ff_fc_density_energy.x, - ff_fc_density_energy.y, - ff_fc_density_energy.z, - ]; - let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); - let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; - let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); - let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; - let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); - let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; - let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); - - let mut runner = runner!(euler); - - HerculesMutBox::from(async_std::task::block_on(async { - runner - .run( - nelr as u64, - iterations as u64, - variables.to(), - areas.to(), - elements_surrounding_elements.to(), - normals.to(), - ff_variable.to(), - ff_fc_density_energy.to(), - ff_fc_momentum_x.to(), - ff_fc_momentum_y.to(), - ff_fc_momentum_z.to(), - ) - .await - })) - .as_slice() - .to_vec() -} - -fn run_pre_euler( - nelr: usize, - iterations: usize, - mut variables: AlignedSlice<f32>, - areas: &[f32], - elements_surrounding_elements: &[i32], - normals: &[f32], - ff_variable: &[f32], - ff_fc_density_energy: &Float3, - ff_fc_momentum_x: &Float3, - ff_fc_momentum_y: &Float3, - ff_fc_momentum_z: &Float3, -) -> Vec<f32> { - let mut variables = HerculesMutBox::from(variables.as_mut_slice()); - let areas = HerculesImmBox::from(areas); - let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); - let normals = HerculesImmBox::from(normals); - let ff_variable = HerculesImmBox::from(ff_variable); - - // TODO: Make hercules box handle structs, for now we'll copy into a vec - let ff_fc_density_energy = vec![ - ff_fc_density_energy.x, - ff_fc_density_energy.y, - ff_fc_density_energy.z, - ]; - let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); - let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; - let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); - let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; - let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); - let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; - let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); - - let mut runner = runner!(pre_euler); - - let variables = variables.to(); - - HerculesMutBox::from(async_std::task::block_on(async { - runner - .run( - nelr as u64, - iterations as u64, - variables, - areas.to(), - elements_surrounding_elements.to(), - normals.to(), - ff_variable.to(), - ff_fc_density_energy.to(), - ff_fc_momentum_x.to(), - ff_fc_momentum_y.to(), - ff_fc_momentum_z.to(), - ) - .await - })) - .as_slice() - .to_vec() -} - -fn compare_float(x: f32, y: f32) -> bool { - (x - y).abs() < 1e-5 -} - -fn compare_floats(xs: &[f32], ys: &[f32]) -> bool { - xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y)) -} - -fn cfd_harness(args: CFDInputs) { - let CFDInputs { - data_file, - iterations, - block_size, - pre_euler, - } = args; - - assert!(block_size % 16 == 0, "Hercules expects all arrays to be 64-byte aligned, cfd uses structs of arrays that are annoying to deal with if the block_size is not a multiple of 16"); - - let FarFieldConditions { - ff_variable, - ff_fc_momentum_x, - ff_fc_momentum_y, - ff_fc_momentum_z, - ff_fc_density_energy, - } = set_far_field_conditions(); - - let GeometryData { - nelr, - areas, - elements_surrounding_elements, - normals, - } = read_domain_geometry(data_file, block_size); - - let variables = initialize_variables(nelr, ff_variable.as_slice()); - - let res_juno = if pre_euler { - run_pre_euler( - nelr, - iterations, - variables.clone(), - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - } else { - run_euler( - nelr, - iterations, - variables.clone(), - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - }; - let res_rust = if pre_euler { - rust_cfd::pre_euler( - nelr, - iterations, - variables, - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - } else { - rust_cfd::euler( - nelr, - iterations, - variables, - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - }; - - if !compare_floats(&res_juno, res_rust.as_slice()) { - assert_eq!(res_juno.len(), res_rust.as_slice().len()); - panic!("Mismatch in results"); - } -} +use juno_cfd::{cfd_harness, CFDInputs}; fn main() { let args = CFDInputs::parse(); -- GitLab From 747b2f41304c0e5e7596bdcde35e45f5c7f73fd3 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Tue, 25 Feb 2025 21:26:13 -0600 Subject: [PATCH 5/5] srad bench --- Cargo.lock | 1 + juno_samples/rodinia/srad/Cargo.toml | 10 ++ .../rodinia/srad/benches/srad_bench.rs | 62 +++++++++ juno_samples/rodinia/srad/src/lib.rs | 123 ++++++++++++++++++ juno_samples/rodinia/srad/src/main.rs | 122 +---------------- 5 files changed, 197 insertions(+), 121 deletions(-) create mode 100644 juno_samples/rodinia/srad/benches/srad_bench.rs create mode 100644 juno_samples/rodinia/srad/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 170a7f3e..61cde7f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1493,6 +1493,7 @@ version = "0.1.0" dependencies = [ "async-std", "clap", + "criterion", "hercules_rt", "juno_build", "nom 8.0.0", diff --git a/juno_samples/rodinia/srad/Cargo.toml b/juno_samples/rodinia/srad/Cargo.toml index e41a8871..facf8c3b 100644 --- a/juno_samples/rodinia/srad/Cargo.toml +++ b/juno_samples/rodinia/srad/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" name = "juno_srad" path = "src/main.rs" +[lib] +path = "src/lib.rs" + [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] @@ -21,3 +24,10 @@ async-std = "*" clap = { version = "*", features = ["derive"] } with_builtin_macros = "0.1.0" nom = "*" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "srad_bench" +harness = false diff --git a/juno_samples/rodinia/srad/benches/srad_bench.rs b/juno_samples/rodinia/srad/benches/srad_bench.rs new file mode 100644 index 00000000..d3274540 --- /dev/null +++ b/juno_samples/rodinia/srad/benches/srad_bench.rs @@ -0,0 +1,62 @@ +#![feature(concat_idents)] +use criterion::{criterion_group, criterion_main, Criterion}; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("srad"); + +use juno_srad::*; + +fn srad_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("srad bench"); + + let mut r = runner!(srad); + let niter = 100; + let lambda = 0.5; + let nrows = 502; + let ncols = 458; + let image = "data/image.pgm".to_string(); + let Image { + image: image_ori, + max, + rows: image_ori_rows, + cols: image_ori_cols, + } = read_graphics(image); + let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols); + let mut image_h = HerculesMutBox::from(image.clone()); + let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>(); + let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>(); + let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>(); + let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>(); + // Fix boundary conditions + iN[0] = 0; + iS[nrows - 1] = (nrows - 1) as i32; + jW[0] = 0; + jE[ncols - 1] = (ncols - 1) as i32; + let iN_h = HerculesImmBox::from(iN.as_slice()); + let iS_h = HerculesImmBox::from(iS.as_slice()); + let jW_h = HerculesImmBox::from(jW.as_slice()); + let jE_h = HerculesImmBox::from(jE.as_slice()); + group.bench_function("srad bench", |b| { + b.iter(|| { + async_std::task::block_on(async { + r.run( + nrows as u64, + ncols as u64, + niter as u64, + image_h.to(), + iN_h.to(), + iS_h.to(), + jW_h.to(), + jE_h.to(), + max, + lambda, + ) + .await + }); + }) + }); +} + +criterion_group!(benches, srad_bench); +criterion_main!(benches); diff --git a/juno_samples/rodinia/srad/src/lib.rs b/juno_samples/rodinia/srad/src/lib.rs new file mode 100644 index 00000000..d6366007 --- /dev/null +++ b/juno_samples/rodinia/srad/src/lib.rs @@ -0,0 +1,123 @@ +#![feature(concat_idents)] +mod graphics; +mod rust_srad; + +pub use graphics::*; + +use clap::Parser; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("srad"); + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct SRADInputs { + pub niter: usize, + pub lambda: f32, + pub nrows: usize, + pub ncols: usize, + pub image: String, + #[clap(short, long)] + pub output: Option<String>, + #[clap(short, long)] + pub verify: bool, + #[clap(long = "output-verify", value_name = "PATH")] + pub output_verify: Option<String>, +} + +pub fn srad_harness(args: SRADInputs) { + async_std::task::block_on(async { + let SRADInputs { + niter, + lambda, + nrows, + ncols, + image, + output, + verify, + output_verify, + } = args; + + let Image { + image: image_ori, + max, + rows: image_ori_rows, + cols: image_ori_cols, + } = read_graphics(image); + let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols); + let mut image_h = HerculesMutBox::from(image.clone()); + + let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>(); + let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>(); + let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>(); + let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>(); + + // Fix boundary conditions + iN[0] = 0; + iS[nrows - 1] = (nrows - 1) as i32; + jW[0] = 0; + jE[ncols - 1] = (ncols - 1) as i32; + + let iN_h = HerculesImmBox::from(iN.as_slice()); + let iS_h = HerculesImmBox::from(iS.as_slice()); + let jW_h = HerculesImmBox::from(jW.as_slice()); + let jE_h = HerculesImmBox::from(jE.as_slice()); + + let mut runner = runner!(srad); + let result: Vec<f32> = HerculesMutBox::from( + runner + .run( + nrows as u64, + ncols as u64, + niter as u64, + image_h.to(), + iN_h.to(), + iS_h.to(), + jW_h.to(), + jE_h.to(), + max, + lambda, + ) + .await, + ) + .as_slice() + .to_vec(); + + if let Some(output) = output { + write_graphics(output, &result, nrows, ncols, max); + } + + if verify { + let mut rust_result = image; + rust_srad::srad( + nrows, + ncols, + niter, + &mut rust_result, + &iN, + &iS, + &jW, + &jE, + max, + lambda, + ); + + if let Some(output) = output_verify { + write_graphics(output, &rust_result, nrows, ncols, max); + } + + let max_diff = result + .iter() + .zip(rust_result.iter()) + .map(|(a, b)| (*a as i32 - *b as i32).abs()) + .max() + .unwrap_or(0); + assert!( + max_diff <= 1, + "Verification failed: maximum pixel difference of {} exceeds threshold of 1", + max_diff + ); + } + }) +} diff --git a/juno_samples/rodinia/srad/src/main.rs b/juno_samples/rodinia/srad/src/main.rs index 1b99b41a..87d1e7e8 100644 --- a/juno_samples/rodinia/srad/src/main.rs +++ b/juno_samples/rodinia/srad/src/main.rs @@ -1,126 +1,6 @@ -#![feature(concat_idents)] -mod graphics; -mod rust_srad; - -use graphics::*; - use clap::Parser; -use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; - -juno_build::juno!("srad"); - -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -struct SRADInputs { - niter: usize, - lambda: f32, - nrows: usize, - ncols: usize, - image: String, - #[clap(short, long)] - output: Option<String>, - #[clap(short, long)] - verify: bool, - #[clap(long = "output-verify", value_name = "PATH")] - output_verify: Option<String>, -} - -fn srad_harness(args: SRADInputs) { - async_std::task::block_on(async { - let SRADInputs { - niter, - lambda, - nrows, - ncols, - image, - output, - verify, - output_verify, - } = args; - - let Image { - image: image_ori, - max, - rows: image_ori_rows, - cols: image_ori_cols, - } = read_graphics(image); - let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols); - let mut image_h = HerculesMutBox::from(image.clone()); - - let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>(); - let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>(); - let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>(); - let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>(); - - // Fix boundary conditions - iN[0] = 0; - iS[nrows - 1] = (nrows - 1) as i32; - jW[0] = 0; - jE[ncols - 1] = (ncols - 1) as i32; - - let iN_h = HerculesImmBox::from(iN.as_slice()); - let iS_h = HerculesImmBox::from(iS.as_slice()); - let jW_h = HerculesImmBox::from(jW.as_slice()); - let jE_h = HerculesImmBox::from(jE.as_slice()); - - let mut runner = runner!(srad); - let result: Vec<f32> = HerculesMutBox::from( - runner - .run( - nrows as u64, - ncols as u64, - niter as u64, - image_h.to(), - iN_h.to(), - iS_h.to(), - jW_h.to(), - jE_h.to(), - max, - lambda, - ) - .await, - ) - .as_slice() - .to_vec(); - - if let Some(output) = output { - write_graphics(output, &result, nrows, ncols, max); - } - - if verify { - let mut rust_result = image; - rust_srad::srad( - nrows, - ncols, - niter, - &mut rust_result, - &iN, - &iS, - &jW, - &jE, - max, - lambda, - ); - - if let Some(output) = output_verify { - write_graphics(output, &rust_result, nrows, ncols, max); - } - - let max_diff = result - .iter() - .zip(rust_result.iter()) - .map(|(a, b)| (*a as i32 - *b as i32).abs()) - .max() - .unwrap_or(0); - assert!( - max_diff <= 1, - "Verification failed: maximum pixel difference of {} exceeds threshold of 1", - max_diff - ); - } - }) -} +use juno_srad::*; fn main() { let args = SRADInputs::parse(); -- GitLab