diff --git a/Cargo.lock b/Cargo.lock index c872be3ad7993859e29e1482866ce49865e3cc29..61cde7f161b7c4177cb781addeb2f484af3b7477 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1238,6 +1238,7 @@ version = "0.1.0" dependencies = [ "async-std", "clap", + "criterion", "hercules_rt", "juno_build", "nom 8.0.0", @@ -1251,6 +1252,7 @@ version = "0.1.0" dependencies = [ "async-std", "clap", + "criterion", "hercules_rt", "juno_build", "nom 8.0.0", @@ -1295,6 +1297,7 @@ version = "0.1.0" dependencies = [ "async-std", "clap", + "criterion", "hercules_rt", "juno_build", "nom 8.0.0", @@ -1490,6 +1493,7 @@ version = "0.1.0" dependencies = [ "async-std", "clap", + "criterion", "hercules_rt", "juno_build", "nom 8.0.0", diff --git a/juno_samples/rodinia/backprop/Cargo.toml b/juno_samples/rodinia/backprop/Cargo.toml index 729b3969c5d9acbbdc7d50ca056bc24ab20dd9f7..25185e0944d171c748f4d5f8e10e9646e5cce9eb 100644 --- a/juno_samples/rodinia/backprop/Cargo.toml +++ b/juno_samples/rodinia/backprop/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" name = "juno_backprop" path = "src/main.rs" +[lib] +path = "src/lib.rs" + [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] @@ -22,3 +25,10 @@ clap = { version = "*", features = ["derive"] } with_builtin_macros = "0.1.0" nom = "*" rand = "0.9.0" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "backprop_bench" +harness = false \ No newline at end of file diff --git a/juno_samples/rodinia/backprop/benches/backprop_bench.rs b/juno_samples/rodinia/backprop/benches/backprop_bench.rs new file mode 100644 index 0000000000000000000000000000000000000000..17bdf6a7766362e79f3536c6cecf4b5282a160c0 --- /dev/null +++ b/juno_samples/rodinia/backprop/benches/backprop_bench.rs @@ -0,0 +1,70 @@ +#![feature(concat_idents)] +use criterion::{criterion_group, criterion_main, Criterion}; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("backprop"); + +// We need this even though we don't use anything from the library because of +// Rust build scripts only linking static libraries into the library, and not +// into the benchmark binary. Ugh! +use juno_backprop::*; + +fn backprop_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("backprop bench"); + group.sample_size(10); + + let mut rng = StdRng::seed_from_u64(7); + + let input_n = 65536; + let hidden_n = 16; + let output_n = 1; + + let mut input_vals = vec![0.0f32; input_n + 1]; + input_vals[0] = 1.0; + + // For some reason the bpnn_randomize_row function used on target just sets it to 0.1 + let target = vec![0.1f32; output_n + 1]; + + let input_weights = (0..(input_n + 1) * (hidden_n + 1)) + .map(|_| rng.random::<f32>()) + .collect::<Vec<_>>(); + let hidden_weights = (0..(hidden_n + 1) * (output_n + 1)) + .map(|_| rng.random::<f32>()) + .collect::<Vec<_>>(); + + let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)]; + let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)]; + + let mut r = runner!(backprop); + let input_vals = HerculesImmBox::from(&input_vals as &[f32]); + let target = HerculesImmBox::from(&target as &[f32]); + let mut input_weights = HerculesMutBox::from(input_weights.to_vec()); + let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec()); + let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec()); + let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec()); + + group.bench_function("backprop bench", |b| { + b.iter(|| { + async_std::task::block_on(async { + r.run( + input_n as u64, + hidden_n as u64, + output_n as u64, + input_vals.to(), + input_weights.to(), + hidden_weights.to(), + target.to(), + input_prev_weights.to(), + hidden_prev_weights.to(), + ) + .await + }); + }) + }); +} + +criterion_group!(benches, backprop_bench); +criterion_main!(benches); diff --git a/juno_samples/rodinia/backprop/src/lib.rs b/juno_samples/rodinia/backprop/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..e2fc2ad5b9cd864c7a7c956bc1091eab350687b4 --- /dev/null +++ b/juno_samples/rodinia/backprop/src/lib.rs @@ -0,0 +1,156 @@ +#![feature(concat_idents)] + +juno_build::juno!("backprop"); + +mod rust_backprop; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; + +use clap::Parser; + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct BackpropInputs { + pub layer_size: usize, +} + +fn run_backprop( + input_n: u64, + hidden_n: u64, + output_n: u64, + input_vals: &[f32], + input_weights: &[f32], + hidden_weights: &[f32], + target: &[f32], + input_prev_weights: &[f32], + hidden_prev_weights: &[f32], +) -> (f32, f32, Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) { + let input_vals = HerculesImmBox::from(input_vals); + let target = HerculesImmBox::from(target); + + let mut input_weights = HerculesMutBox::from(input_weights.to_vec()); + let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec()); + let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec()); + let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec()); + + let mut runner = runner!(backprop); + let (out_err, hid_err, input_weights, input_prev_weights, hidden_weights, hidden_prev_weights) = + async_std::task::block_on(async { + runner + .run( + input_n, + hidden_n, + output_n, + input_vals.to(), + input_weights.to(), + hidden_weights.to(), + target.to(), + input_prev_weights.to(), + hidden_prev_weights.to(), + ) + .await + }); + let mut input_weights = HerculesMutBox::from(input_weights); + let mut hidden_weights = HerculesMutBox::from(hidden_weights); + let mut input_prev_weights = HerculesMutBox::from(input_prev_weights); + let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights); + + ( + out_err, + hid_err, + input_weights.as_slice().to_vec(), + hidden_weights.as_slice().to_vec(), + input_prev_weights.as_slice().to_vec(), + hidden_prev_weights.as_slice().to_vec(), + ) +} + +fn compare_float(x: f32, y: f32) -> bool { + (x - y).abs() < 1e-5 +} + +fn compare_floats(xs: &[f32], ys: &[f32]) -> bool { + xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y)) +} + +pub fn backprop_harness(args: BackpropInputs) { + let BackpropInputs { layer_size } = args; + + let mut rng = StdRng::seed_from_u64(7); + + let input_n = layer_size; + let hidden_n = 16; + let output_n = 1; + + let mut input_vals = vec![0.0; input_n + 1]; + input_vals[0] = 1.0; + + // For some reason the bpnn_randomize_row function used on target just sets it to 0.1 + let target = vec![0.1; output_n + 1]; + + let input_weights = (0..(input_n + 1) * (hidden_n + 1)) + .map(|_| rng.random::<f32>()) + .collect::<Vec<_>>(); + let hidden_weights = (0..(hidden_n + 1) * (output_n + 1)) + .map(|_| rng.random::<f32>()) + .collect::<Vec<_>>(); + + let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)]; + let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)]; + + let ( + juno_out_err, + juno_hid_err, + juno_input_weights, + juno_hidden_weights, + juno_input_prev_weights, + juno_hidden_prev_weights, + ) = run_backprop( + input_n as u64, + hidden_n as u64, + output_n as u64, + &input_vals, + &input_weights, + &hidden_weights, + &target, + &input_prev_weights, + &hidden_prev_weights, + ); + + let ( + rust_out_err, + rust_hid_err, + rust_input_weights, + rust_hidden_weights, + rust_input_prev_weights, + rust_hidden_prev_weights, + ) = rust_backprop::backprop( + input_n, + hidden_n, + output_n, + &input_vals, + input_weights, + hidden_weights, + &target, + input_prev_weights, + hidden_prev_weights, + ); + + assert!(compare_float(juno_out_err, rust_out_err)); + assert!(compare_float(juno_hid_err, rust_hid_err)); + if !compare_floats(&juno_input_weights, &rust_input_weights) { + panic!("Input weights do not match after training"); + } + if !compare_floats(&juno_hidden_weights, &rust_hidden_weights) { + panic!("Hidden weights do not match after training"); + } + if !compare_floats(&juno_input_prev_weights, &rust_input_prev_weights) { + panic!("Input prev_weights do not match after training"); + } + if !compare_floats(&juno_hidden_prev_weights, &rust_hidden_prev_weights) { + panic!("Hidden prev_weights do not match after training"); + } +} diff --git a/juno_samples/rodinia/backprop/src/main.rs b/juno_samples/rodinia/backprop/src/main.rs index fa80a7a51cba6581f3305398f5e3f91da05ad877..bb0d13a10946d0eee282fb3d5c4ff9b556b127fd 100644 --- a/juno_samples/rodinia/backprop/src/main.rs +++ b/juno_samples/rodinia/backprop/src/main.rs @@ -1,159 +1,6 @@ -#![feature(concat_idents)] - -juno_build::juno!("backprop"); - -mod rust_backprop; - -use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; - -use rand::rngs::StdRng; -use rand::{Rng, SeedableRng}; - use clap::Parser; -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -struct BackpropInputs { - layer_size: usize, -} - -fn run_backprop( - input_n: u64, - hidden_n: u64, - output_n: u64, - input_vals: &[f32], - input_weights: &[f32], - hidden_weights: &[f32], - target: &[f32], - input_prev_weights: &[f32], - hidden_prev_weights: &[f32], -) -> (f32, f32, Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) { - let input_vals = HerculesImmBox::from(input_vals); - let target = HerculesImmBox::from(target); - - let mut input_weights = HerculesMutBox::from(input_weights.to_vec()); - let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec()); - let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec()); - let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec()); - - let mut runner = runner!(backprop); - let (out_err, hid_err, input_weights, input_prev_weights, hidden_weights, hidden_prev_weights) = - async_std::task::block_on(async { - runner - .run( - input_n, - hidden_n, - output_n, - input_vals.to(), - input_weights.to(), - hidden_weights.to(), - target.to(), - input_prev_weights.to(), - hidden_prev_weights.to(), - ) - .await - }); - let mut input_weights = HerculesMutBox::from(input_weights); - let mut hidden_weights = HerculesMutBox::from(hidden_weights); - let mut input_prev_weights = HerculesMutBox::from(input_prev_weights); - let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights); - - ( - out_err, - hid_err, - input_weights.as_slice().to_vec(), - hidden_weights.as_slice().to_vec(), - input_prev_weights.as_slice().to_vec(), - hidden_prev_weights.as_slice().to_vec(), - ) -} - -fn compare_float(x: f32, y: f32) -> bool { - (x - y).abs() < 1e-5 -} - -fn compare_floats(xs: &[f32], ys: &[f32]) -> bool { - xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y)) -} - -fn backprop_harness(args: BackpropInputs) { - let BackpropInputs { layer_size } = args; - - let mut rng = StdRng::seed_from_u64(7); - - let input_n = layer_size; - let hidden_n = 16; - let output_n = 1; - - let mut input_vals = vec![0.0; input_n + 1]; - input_vals[0] = 1.0; - - // For some reason the bpnn_randomize_row function used on target just sets it to 0.1 - let target = vec![0.1; output_n + 1]; - - let input_weights = (0..(input_n + 1) * (hidden_n + 1)) - .map(|_| rng.random::<f32>()) - .collect::<Vec<_>>(); - let hidden_weights = (0..(hidden_n + 1) * (output_n + 1)) - .map(|_| rng.random::<f32>()) - .collect::<Vec<_>>(); - - let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)]; - let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)]; - - let ( - juno_out_err, - juno_hid_err, - juno_input_weights, - juno_hidden_weights, - juno_input_prev_weights, - juno_hidden_prev_weights, - ) = run_backprop( - input_n as u64, - hidden_n as u64, - output_n as u64, - &input_vals, - &input_weights, - &hidden_weights, - &target, - &input_prev_weights, - &hidden_prev_weights, - ); - - let ( - rust_out_err, - rust_hid_err, - rust_input_weights, - rust_hidden_weights, - rust_input_prev_weights, - rust_hidden_prev_weights, - ) = rust_backprop::backprop( - input_n, - hidden_n, - output_n, - &input_vals, - input_weights, - hidden_weights, - &target, - input_prev_weights, - hidden_prev_weights, - ); - - assert!(compare_float(juno_out_err, rust_out_err)); - assert!(compare_float(juno_hid_err, rust_hid_err)); - if !compare_floats(&juno_input_weights, &rust_input_weights) { - panic!("Input weights do not match after training"); - } - if !compare_floats(&juno_hidden_weights, &rust_hidden_weights) { - panic!("Hidden weights do not match after training"); - } - if !compare_floats(&juno_input_prev_weights, &rust_input_prev_weights) { - panic!("Input prev_weights do not match after training"); - } - if !compare_floats(&juno_hidden_prev_weights, &rust_hidden_prev_weights) { - panic!("Hidden prev_weights do not match after training"); - } -} +use juno_backprop::{backprop_harness, BackpropInputs}; fn main() { let args = BackpropInputs::parse(); diff --git a/juno_samples/rodinia/bfs/Cargo.toml b/juno_samples/rodinia/bfs/Cargo.toml index 2ae6c8c06ae5ebab70469eb0ea9b5df70ed99ba9..34b6f5cefccc5a7f175ef0389c0ec0310e17079a 100644 --- a/juno_samples/rodinia/bfs/Cargo.toml +++ b/juno_samples/rodinia/bfs/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" name = "juno_bfs" path = "src/main.rs" +[lib] +path = "src/lib.rs" + [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] @@ -21,3 +24,10 @@ async-std = "*" clap = { version = "*", features = ["derive"] } with_builtin_macros = "0.1.0" nom = "*" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "bfs_bench" +harness = false diff --git a/juno_samples/rodinia/bfs/benches/bfs_bench.rs b/juno_samples/rodinia/bfs/benches/bfs_bench.rs new file mode 100644 index 0000000000000000000000000000000000000000..bf39a0fce738dc5c100da3b6f0d85eaa2c9420bd --- /dev/null +++ b/juno_samples/rodinia/bfs/benches/bfs_bench.rs @@ -0,0 +1,41 @@ +#![feature(concat_idents)] +use criterion::{criterion_group, criterion_main, Criterion}; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo}; + +juno_build::juno!("bfs"); + +use juno_bfs::graph_parser::*; + +fn bfs_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("bfs bench"); + + let mut r = runner!(bfs); + + let input = "data/graph4096.txt"; + let (nodes, source, edges) = parse_graph(input.into()); + let n = nodes.len() as u64; + let m = edges.len() as u64; + let nodes = HerculesImmBox::from(&nodes as &[Node]); + let edges = HerculesImmBox::from(&edges as &[u32]); + group.bench_function("bfs bench 4096", |b| { + b.iter(|| { + async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await }); + }) + }); + + let input = "data/graph65536.txt"; + let (nodes, source, edges) = parse_graph(input.into()); + let n = nodes.len() as u64; + let m = edges.len() as u64; + let nodes = HerculesImmBox::from(&nodes as &[Node]); + let edges = HerculesImmBox::from(&edges as &[u32]); + group.bench_function("bfs bench 65536", |b| { + b.iter(|| { + async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await }); + }) + }); +} + +criterion_group!(benches, bfs_bench); +criterion_main!(benches); diff --git a/juno_samples/rodinia/bfs/src/lib.rs b/juno_samples/rodinia/bfs/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..218e9bb0ffd73a2fb42b21a1fcc12fcc2cb6bb68 --- /dev/null +++ b/juno_samples/rodinia/bfs/src/lib.rs @@ -0,0 +1,44 @@ +#![feature(concat_idents)] +pub mod graph_parser; +mod rust_bfs; + +use graph_parser::*; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox}; + +use clap::Parser; + +juno_build::juno!("bfs"); + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct BFSInputs { + pub input: String, +} + +fn run_bfs(nodes: &[Node], source: u32, edges: &[u32]) -> Vec<i32> { + let n = nodes.len() as u64; + let m = edges.len() as u64; + + let nodes = HerculesImmBox::from(nodes); + let edges = HerculesImmBox::from(edges); + + let mut runner = runner!(bfs); + + HerculesMutBox::from(async_std::task::block_on(async { + runner.run(n, m, nodes.to(), source, edges.to()).await + })) + .as_slice() + .to_vec() +} + +pub fn bfs_harness(args: BFSInputs) { + let BFSInputs { input } = args; + + let (nodes, source, edges) = parse_graph(input); + + let costs_juno = run_bfs(&nodes, source, &edges); + let costs_ref = rust_bfs::bfs(&nodes, source, &edges); + + assert_eq!(costs_juno, costs_ref); +} diff --git a/juno_samples/rodinia/bfs/src/main.rs b/juno_samples/rodinia/bfs/src/main.rs index 21e48c35e5dbd33875dccb5767d644d6ea2bca7e..0ad23b007c15a1ae477666aebc747727561837dd 100644 --- a/juno_samples/rodinia/bfs/src/main.rs +++ b/juno_samples/rodinia/bfs/src/main.rs @@ -1,47 +1,6 @@ -#![feature(concat_idents)] -mod graph_parser; -mod rust_bfs; - -use graph_parser::*; - -use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox}; - use clap::Parser; -juno_build::juno!("bfs"); - -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -struct BFSInputs { - input: String, -} - -fn run_bfs(nodes: &[Node], source: u32, edges: &[u32]) -> Vec<i32> { - let n = nodes.len() as u64; - let m = edges.len() as u64; - - let nodes = HerculesImmBox::from(nodes); - let edges = HerculesImmBox::from(edges); - - let mut runner = runner!(bfs); - - HerculesMutBox::from(async_std::task::block_on(async { - runner.run(n, m, nodes.to(), source, edges.to()).await - })) - .as_slice() - .to_vec() -} - -fn bfs_harness(args: BFSInputs) { - let BFSInputs { input } = args; - - let (nodes, source, edges) = parse_graph(input); - - let costs_juno = run_bfs(&nodes, source, &edges); - let costs_ref = rust_bfs::bfs(&nodes, source, &edges); - - assert_eq!(costs_juno, costs_ref); -} +use juno_bfs::{bfs_harness, BFSInputs}; fn main() { let args = BFSInputs::parse(); diff --git a/juno_samples/rodinia/cfd/Cargo.toml b/juno_samples/rodinia/cfd/Cargo.toml index 542ca7a23f8224ae3976e6b2d3c4e7ab7a25a453..6720b5275381594a63f31571ccf6266ebe4e46f4 100644 --- a/juno_samples/rodinia/cfd/Cargo.toml +++ b/juno_samples/rodinia/cfd/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" name = "juno_cfd" path = "src/main.rs" +[lib] +path = "src/lib.rs" + [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] @@ -21,3 +24,10 @@ async-std = "*" clap = { version = "*", features = ["derive"] } with_builtin_macros = "0.1.0" nom = "*" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "cfd_bench" +harness = false diff --git a/juno_samples/rodinia/cfd/benches/cfd_bench.rs b/juno_samples/rodinia/cfd/benches/cfd_bench.rs new file mode 100644 index 0000000000000000000000000000000000000000..fd614b42a55488bfcda64a853105fd40e53ff7bc --- /dev/null +++ b/juno_samples/rodinia/cfd/benches/cfd_bench.rs @@ -0,0 +1,130 @@ +#![feature(concat_idents)] +use criterion::{criterion_group, criterion_main, Criterion}; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("euler"); +juno_build::juno!("pre_euler"); + +use juno_cfd::*; + +fn cfd_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("cfd bench"); + + let mut r = runner!(euler); + let data_file = "data/fvcorr.domn.097K".to_string(); + let iterations = 1; + let block_size = 16; + let FarFieldConditions { + ff_variable, + ff_fc_momentum_x, + ff_fc_momentum_y, + ff_fc_momentum_z, + ff_fc_density_energy, + } = set_far_field_conditions(); + let GeometryData { + nelr, + areas, + elements_surrounding_elements, + normals, + } = read_domain_geometry(data_file, block_size); + let mut variables = initialize_variables(nelr, ff_variable.as_slice()); + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas.as_slice()); + let elements_surrounding_elements = + HerculesImmBox::from(elements_surrounding_elements.as_slice()); + let normals = HerculesImmBox::from(normals.as_slice()); + let ff_variable = HerculesImmBox::from(ff_variable.as_slice()); + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + group.bench_function("cfd bench euler", |b| { + b.iter(|| { + async_std::task::block_on(async { + r.run( + nelr as u64, + iterations as u64, + variables.to(), + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + }); + }) + }); + + let mut r = runner!(pre_euler); + let data_file = "data/fvcorr.domn.097K".to_string(); + let iterations = 1; + let block_size = 16; + let FarFieldConditions { + ff_variable, + ff_fc_momentum_x, + ff_fc_momentum_y, + ff_fc_momentum_z, + ff_fc_density_energy, + } = set_far_field_conditions(); + let GeometryData { + nelr, + areas, + elements_surrounding_elements, + normals, + } = read_domain_geometry(data_file, block_size); + let mut variables = initialize_variables(nelr, ff_variable.as_slice()); + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas.as_slice()); + let elements_surrounding_elements = + HerculesImmBox::from(elements_surrounding_elements.as_slice()); + let normals = HerculesImmBox::from(normals.as_slice()); + let ff_variable = HerculesImmBox::from(ff_variable.as_slice()); + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + group.bench_function("cfd bench pre-euler", |b| { + b.iter(|| { + async_std::task::block_on(async { + r.run( + nelr as u64, + iterations as u64, + variables.to(), + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + }); + }) + }); +} + +criterion_group!(benches, cfd_bench); +criterion_main!(benches); diff --git a/juno_samples/rodinia/cfd/src/lib.rs b/juno_samples/rodinia/cfd/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..39384c0d6a322f22a7c7b7ef7e51348d80a36107 --- /dev/null +++ b/juno_samples/rodinia/cfd/src/lib.rs @@ -0,0 +1,236 @@ +#![feature(concat_idents)] +mod rust_cfd; +mod setup; + +use clap::Parser; + +pub use crate::setup::*; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("euler"); +juno_build::juno!("pre_euler"); + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct CFDInputs { + pub data_file: String, + pub iterations: usize, + pub block_size: usize, + #[clap(short = None, long = Some("pre-euler"))] + pub pre_euler: bool, +} + +fn run_euler( + nelr: usize, + iterations: usize, + mut variables: AlignedSlice<f32>, + areas: &[f32], + elements_surrounding_elements: &[i32], + normals: &[f32], + ff_variable: &[f32], + ff_fc_density_energy: &Float3, + ff_fc_momentum_x: &Float3, + ff_fc_momentum_y: &Float3, + ff_fc_momentum_z: &Float3, +) -> Vec<f32> { + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas); + let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); + let normals = HerculesImmBox::from(normals); + let ff_variable = HerculesImmBox::from(ff_variable); + + // TODO: Make hercules box handle structs, for now we'll copy into a vec + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + + let mut runner = runner!(euler); + + HerculesMutBox::from(async_std::task::block_on(async { + runner + .run( + nelr as u64, + iterations as u64, + variables.to(), + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + })) + .as_slice() + .to_vec() +} + +fn run_pre_euler( + nelr: usize, + iterations: usize, + mut variables: AlignedSlice<f32>, + areas: &[f32], + elements_surrounding_elements: &[i32], + normals: &[f32], + ff_variable: &[f32], + ff_fc_density_energy: &Float3, + ff_fc_momentum_x: &Float3, + ff_fc_momentum_y: &Float3, + ff_fc_momentum_z: &Float3, +) -> Vec<f32> { + let mut variables = HerculesMutBox::from(variables.as_mut_slice()); + let areas = HerculesImmBox::from(areas); + let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); + let normals = HerculesImmBox::from(normals); + let ff_variable = HerculesImmBox::from(ff_variable); + + // TODO: Make hercules box handle structs, for now we'll copy into a vec + let ff_fc_density_energy = vec![ + ff_fc_density_energy.x, + ff_fc_density_energy.y, + ff_fc_density_energy.z, + ]; + let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); + let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; + let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); + let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; + let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); + let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; + let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); + + let mut runner = runner!(pre_euler); + + let variables = variables.to(); + + HerculesMutBox::from(async_std::task::block_on(async { + runner + .run( + nelr as u64, + iterations as u64, + variables, + areas.to(), + elements_surrounding_elements.to(), + normals.to(), + ff_variable.to(), + ff_fc_density_energy.to(), + ff_fc_momentum_x.to(), + ff_fc_momentum_y.to(), + ff_fc_momentum_z.to(), + ) + .await + })) + .as_slice() + .to_vec() +} + +fn compare_float(x: f32, y: f32) -> bool { + (x - y).abs() < 1e-5 +} + +fn compare_floats(xs: &[f32], ys: &[f32]) -> bool { + xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y)) +} + +pub fn cfd_harness(args: CFDInputs) { + let CFDInputs { + data_file, + iterations, + block_size, + pre_euler, + } = args; + + assert!(block_size % 16 == 0, "Hercules expects all arrays to be 64-byte aligned, cfd uses structs of arrays that are annoying to deal with if the block_size is not a multiple of 16"); + + let FarFieldConditions { + ff_variable, + ff_fc_momentum_x, + ff_fc_momentum_y, + ff_fc_momentum_z, + ff_fc_density_energy, + } = set_far_field_conditions(); + + let GeometryData { + nelr, + areas, + elements_surrounding_elements, + normals, + } = read_domain_geometry(data_file, block_size); + + let variables = initialize_variables(nelr, ff_variable.as_slice()); + + let res_juno = if pre_euler { + run_pre_euler( + nelr, + iterations, + variables.clone(), + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + } else { + run_euler( + nelr, + iterations, + variables.clone(), + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + }; + let res_rust = if pre_euler { + rust_cfd::pre_euler( + nelr, + iterations, + variables, + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + } else { + rust_cfd::euler( + nelr, + iterations, + variables, + areas.as_slice(), + elements_surrounding_elements.as_slice(), + normals.as_slice(), + ff_variable.as_slice(), + &ff_fc_density_energy, + &ff_fc_momentum_x, + &ff_fc_momentum_y, + &ff_fc_momentum_z, + ) + }; + + if !compare_floats(&res_juno, res_rust.as_slice()) { + assert_eq!(res_juno.len(), res_rust.as_slice().len()); + panic!("Mismatch in results"); + } +} diff --git a/juno_samples/rodinia/cfd/src/main.rs b/juno_samples/rodinia/cfd/src/main.rs index fab241fa6ead74d7ab52c774a6a6b0c6984426cc..277a3edb702cb29e835220cd891e5df957eb92d5 100644 --- a/juno_samples/rodinia/cfd/src/main.rs +++ b/juno_samples/rodinia/cfd/src/main.rs @@ -1,239 +1,6 @@ -#![feature(concat_idents)] -mod rust_cfd; -mod setup; - use clap::Parser; -use crate::setup::*; - -use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; - -juno_build::juno!("euler"); -juno_build::juno!("pre_euler"); - -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -struct CFDInputs { - data_file: String, - iterations: usize, - block_size: usize, - #[clap(short = None, long = Some("pre-euler"))] - pre_euler: bool, -} - -fn run_euler( - nelr: usize, - iterations: usize, - mut variables: AlignedSlice<f32>, - areas: &[f32], - elements_surrounding_elements: &[i32], - normals: &[f32], - ff_variable: &[f32], - ff_fc_density_energy: &Float3, - ff_fc_momentum_x: &Float3, - ff_fc_momentum_y: &Float3, - ff_fc_momentum_z: &Float3, -) -> Vec<f32> { - let mut variables = HerculesMutBox::from(variables.as_mut_slice()); - let areas = HerculesImmBox::from(areas); - let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); - let normals = HerculesImmBox::from(normals); - let ff_variable = HerculesImmBox::from(ff_variable); - - // TODO: Make hercules box handle structs, for now we'll copy into a vec - let ff_fc_density_energy = vec![ - ff_fc_density_energy.x, - ff_fc_density_energy.y, - ff_fc_density_energy.z, - ]; - let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); - let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; - let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); - let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; - let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); - let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; - let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); - - let mut runner = runner!(euler); - - HerculesMutBox::from(async_std::task::block_on(async { - runner - .run( - nelr as u64, - iterations as u64, - variables.to(), - areas.to(), - elements_surrounding_elements.to(), - normals.to(), - ff_variable.to(), - ff_fc_density_energy.to(), - ff_fc_momentum_x.to(), - ff_fc_momentum_y.to(), - ff_fc_momentum_z.to(), - ) - .await - })) - .as_slice() - .to_vec() -} - -fn run_pre_euler( - nelr: usize, - iterations: usize, - mut variables: AlignedSlice<f32>, - areas: &[f32], - elements_surrounding_elements: &[i32], - normals: &[f32], - ff_variable: &[f32], - ff_fc_density_energy: &Float3, - ff_fc_momentum_x: &Float3, - ff_fc_momentum_y: &Float3, - ff_fc_momentum_z: &Float3, -) -> Vec<f32> { - let mut variables = HerculesMutBox::from(variables.as_mut_slice()); - let areas = HerculesImmBox::from(areas); - let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements); - let normals = HerculesImmBox::from(normals); - let ff_variable = HerculesImmBox::from(ff_variable); - - // TODO: Make hercules box handle structs, for now we'll copy into a vec - let ff_fc_density_energy = vec![ - ff_fc_density_energy.x, - ff_fc_density_energy.y, - ff_fc_density_energy.z, - ]; - let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice()); - let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z]; - let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice()); - let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z]; - let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice()); - let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z]; - let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice()); - - let mut runner = runner!(pre_euler); - - let variables = variables.to(); - - HerculesMutBox::from(async_std::task::block_on(async { - runner - .run( - nelr as u64, - iterations as u64, - variables, - areas.to(), - elements_surrounding_elements.to(), - normals.to(), - ff_variable.to(), - ff_fc_density_energy.to(), - ff_fc_momentum_x.to(), - ff_fc_momentum_y.to(), - ff_fc_momentum_z.to(), - ) - .await - })) - .as_slice() - .to_vec() -} - -fn compare_float(x: f32, y: f32) -> bool { - (x - y).abs() < 1e-5 -} - -fn compare_floats(xs: &[f32], ys: &[f32]) -> bool { - xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y)) -} - -fn cfd_harness(args: CFDInputs) { - let CFDInputs { - data_file, - iterations, - block_size, - pre_euler, - } = args; - - assert!(block_size % 16 == 0, "Hercules expects all arrays to be 64-byte aligned, cfd uses structs of arrays that are annoying to deal with if the block_size is not a multiple of 16"); - - let FarFieldConditions { - ff_variable, - ff_fc_momentum_x, - ff_fc_momentum_y, - ff_fc_momentum_z, - ff_fc_density_energy, - } = set_far_field_conditions(); - - let GeometryData { - nelr, - areas, - elements_surrounding_elements, - normals, - } = read_domain_geometry(data_file, block_size); - - let variables = initialize_variables(nelr, ff_variable.as_slice()); - - let res_juno = if pre_euler { - run_pre_euler( - nelr, - iterations, - variables.clone(), - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - } else { - run_euler( - nelr, - iterations, - variables.clone(), - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - }; - let res_rust = if pre_euler { - rust_cfd::pre_euler( - nelr, - iterations, - variables, - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - } else { - rust_cfd::euler( - nelr, - iterations, - variables, - areas.as_slice(), - elements_surrounding_elements.as_slice(), - normals.as_slice(), - ff_variable.as_slice(), - &ff_fc_density_energy, - &ff_fc_momentum_x, - &ff_fc_momentum_y, - &ff_fc_momentum_z, - ) - }; - - if !compare_floats(&res_juno, res_rust.as_slice()) { - assert_eq!(res_juno.len(), res_rust.as_slice().len()); - panic!("Mismatch in results"); - } -} +use juno_cfd::{cfd_harness, CFDInputs}; fn main() { let args = CFDInputs::parse(); diff --git a/juno_samples/rodinia/srad/Cargo.toml b/juno_samples/rodinia/srad/Cargo.toml index e41a8871d3694045d42206028d56c429698e41fc..facf8c3bc7c92fe0b77dd85900c3e53307d358e5 100644 --- a/juno_samples/rodinia/srad/Cargo.toml +++ b/juno_samples/rodinia/srad/Cargo.toml @@ -8,6 +8,9 @@ edition = "2021" name = "juno_srad" path = "src/main.rs" +[lib] +path = "src/lib.rs" + [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] @@ -21,3 +24,10 @@ async-std = "*" clap = { version = "*", features = ["derive"] } with_builtin_macros = "0.1.0" nom = "*" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "srad_bench" +harness = false diff --git a/juno_samples/rodinia/srad/benches/srad_bench.rs b/juno_samples/rodinia/srad/benches/srad_bench.rs new file mode 100644 index 0000000000000000000000000000000000000000..d327454002a6f9cabe4c40f74098570ea0d22d66 --- /dev/null +++ b/juno_samples/rodinia/srad/benches/srad_bench.rs @@ -0,0 +1,62 @@ +#![feature(concat_idents)] +use criterion::{criterion_group, criterion_main, Criterion}; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("srad"); + +use juno_srad::*; + +fn srad_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("srad bench"); + + let mut r = runner!(srad); + let niter = 100; + let lambda = 0.5; + let nrows = 502; + let ncols = 458; + let image = "data/image.pgm".to_string(); + let Image { + image: image_ori, + max, + rows: image_ori_rows, + cols: image_ori_cols, + } = read_graphics(image); + let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols); + let mut image_h = HerculesMutBox::from(image.clone()); + let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>(); + let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>(); + let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>(); + let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>(); + // Fix boundary conditions + iN[0] = 0; + iS[nrows - 1] = (nrows - 1) as i32; + jW[0] = 0; + jE[ncols - 1] = (ncols - 1) as i32; + let iN_h = HerculesImmBox::from(iN.as_slice()); + let iS_h = HerculesImmBox::from(iS.as_slice()); + let jW_h = HerculesImmBox::from(jW.as_slice()); + let jE_h = HerculesImmBox::from(jE.as_slice()); + group.bench_function("srad bench", |b| { + b.iter(|| { + async_std::task::block_on(async { + r.run( + nrows as u64, + ncols as u64, + niter as u64, + image_h.to(), + iN_h.to(), + iS_h.to(), + jW_h.to(), + jE_h.to(), + max, + lambda, + ) + .await + }); + }) + }); +} + +criterion_group!(benches, srad_bench); +criterion_main!(benches); diff --git a/juno_samples/rodinia/srad/src/lib.rs b/juno_samples/rodinia/srad/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..d63660070ff0f61d47057ea00b14b3fb31db6e09 --- /dev/null +++ b/juno_samples/rodinia/srad/src/lib.rs @@ -0,0 +1,123 @@ +#![feature(concat_idents)] +mod graphics; +mod rust_srad; + +pub use graphics::*; + +use clap::Parser; + +use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; + +juno_build::juno!("srad"); + +#[derive(Parser)] +#[clap(author, version, about, long_about = None)] +pub struct SRADInputs { + pub niter: usize, + pub lambda: f32, + pub nrows: usize, + pub ncols: usize, + pub image: String, + #[clap(short, long)] + pub output: Option<String>, + #[clap(short, long)] + pub verify: bool, + #[clap(long = "output-verify", value_name = "PATH")] + pub output_verify: Option<String>, +} + +pub fn srad_harness(args: SRADInputs) { + async_std::task::block_on(async { + let SRADInputs { + niter, + lambda, + nrows, + ncols, + image, + output, + verify, + output_verify, + } = args; + + let Image { + image: image_ori, + max, + rows: image_ori_rows, + cols: image_ori_cols, + } = read_graphics(image); + let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols); + let mut image_h = HerculesMutBox::from(image.clone()); + + let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>(); + let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>(); + let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>(); + let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>(); + + // Fix boundary conditions + iN[0] = 0; + iS[nrows - 1] = (nrows - 1) as i32; + jW[0] = 0; + jE[ncols - 1] = (ncols - 1) as i32; + + let iN_h = HerculesImmBox::from(iN.as_slice()); + let iS_h = HerculesImmBox::from(iS.as_slice()); + let jW_h = HerculesImmBox::from(jW.as_slice()); + let jE_h = HerculesImmBox::from(jE.as_slice()); + + let mut runner = runner!(srad); + let result: Vec<f32> = HerculesMutBox::from( + runner + .run( + nrows as u64, + ncols as u64, + niter as u64, + image_h.to(), + iN_h.to(), + iS_h.to(), + jW_h.to(), + jE_h.to(), + max, + lambda, + ) + .await, + ) + .as_slice() + .to_vec(); + + if let Some(output) = output { + write_graphics(output, &result, nrows, ncols, max); + } + + if verify { + let mut rust_result = image; + rust_srad::srad( + nrows, + ncols, + niter, + &mut rust_result, + &iN, + &iS, + &jW, + &jE, + max, + lambda, + ); + + if let Some(output) = output_verify { + write_graphics(output, &rust_result, nrows, ncols, max); + } + + let max_diff = result + .iter() + .zip(rust_result.iter()) + .map(|(a, b)| (*a as i32 - *b as i32).abs()) + .max() + .unwrap_or(0); + assert!( + max_diff <= 1, + "Verification failed: maximum pixel difference of {} exceeds threshold of 1", + max_diff + ); + } + }) +} diff --git a/juno_samples/rodinia/srad/src/main.rs b/juno_samples/rodinia/srad/src/main.rs index 1b99b41aada5341fb3157cfb0d97be3a99e73796..87d1e7e8504584478f51ac2b9dc20dbc04716c81 100644 --- a/juno_samples/rodinia/srad/src/main.rs +++ b/juno_samples/rodinia/srad/src/main.rs @@ -1,126 +1,6 @@ -#![feature(concat_idents)] -mod graphics; -mod rust_srad; - -use graphics::*; - use clap::Parser; -use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo}; - -juno_build::juno!("srad"); - -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -struct SRADInputs { - niter: usize, - lambda: f32, - nrows: usize, - ncols: usize, - image: String, - #[clap(short, long)] - output: Option<String>, - #[clap(short, long)] - verify: bool, - #[clap(long = "output-verify", value_name = "PATH")] - output_verify: Option<String>, -} - -fn srad_harness(args: SRADInputs) { - async_std::task::block_on(async { - let SRADInputs { - niter, - lambda, - nrows, - ncols, - image, - output, - verify, - output_verify, - } = args; - - let Image { - image: image_ori, - max, - rows: image_ori_rows, - cols: image_ori_cols, - } = read_graphics(image); - let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols); - let mut image_h = HerculesMutBox::from(image.clone()); - - let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>(); - let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>(); - let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>(); - let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>(); - - // Fix boundary conditions - iN[0] = 0; - iS[nrows - 1] = (nrows - 1) as i32; - jW[0] = 0; - jE[ncols - 1] = (ncols - 1) as i32; - - let iN_h = HerculesImmBox::from(iN.as_slice()); - let iS_h = HerculesImmBox::from(iS.as_slice()); - let jW_h = HerculesImmBox::from(jW.as_slice()); - let jE_h = HerculesImmBox::from(jE.as_slice()); - - let mut runner = runner!(srad); - let result: Vec<f32> = HerculesMutBox::from( - runner - .run( - nrows as u64, - ncols as u64, - niter as u64, - image_h.to(), - iN_h.to(), - iS_h.to(), - jW_h.to(), - jE_h.to(), - max, - lambda, - ) - .await, - ) - .as_slice() - .to_vec(); - - if let Some(output) = output { - write_graphics(output, &result, nrows, ncols, max); - } - - if verify { - let mut rust_result = image; - rust_srad::srad( - nrows, - ncols, - niter, - &mut rust_result, - &iN, - &iS, - &jW, - &jE, - max, - lambda, - ); - - if let Some(output) = output_verify { - write_graphics(output, &rust_result, nrows, ncols, max); - } - - let max_diff = result - .iter() - .zip(rust_result.iter()) - .map(|(a, b)| (*a as i32 - *b as i32).abs()) - .max() - .unwrap_or(0); - assert!( - max_diff <= 1, - "Verification failed: maximum pixel difference of {} exceeds threshold of 1", - max_diff - ); - } - }) -} +use juno_srad::*; fn main() { let args = SRADInputs::parse();