From 65ca4faf0b676522dc27690d481aec4c82861f52 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 25 Feb 2025 17:20:07 -0600
Subject: [PATCH 1/5] bench for backprop but weird linking error

---
 Cargo.lock                                    |   1 +
 juno_samples/rodinia/backprop/Cargo.toml      |  10 ++
 .../backprop/benches/backprop_bench.rs        |  65 ++++++++
 juno_samples/rodinia/backprop/src/lib.rs      | 156 ++++++++++++++++++
 juno_samples/rodinia/backprop/src/main.rs     | 155 +----------------
 5 files changed, 233 insertions(+), 154 deletions(-)
 create mode 100644 juno_samples/rodinia/backprop/benches/backprop_bench.rs
 create mode 100644 juno_samples/rodinia/backprop/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index c872be3a..5916a17c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1238,6 +1238,7 @@ version = "0.1.0"
 dependencies = [
  "async-std",
  "clap",
+ "criterion",
  "hercules_rt",
  "juno_build",
  "nom 8.0.0",
diff --git a/juno_samples/rodinia/backprop/Cargo.toml b/juno_samples/rodinia/backprop/Cargo.toml
index 729b3969..25185e09 100644
--- a/juno_samples/rodinia/backprop/Cargo.toml
+++ b/juno_samples/rodinia/backprop/Cargo.toml
@@ -8,6 +8,9 @@ edition = "2021"
 name = "juno_backprop"
 path = "src/main.rs"
 
+[lib]
+path = "src/lib.rs"
+
 [features]
 cuda = ["juno_build/cuda", "hercules_rt/cuda"]
 
@@ -22,3 +25,10 @@ clap = { version = "*", features = ["derive"] }
 with_builtin_macros = "0.1.0"
 nom = "*"
 rand = "0.9.0"
+
+[dev-dependencies]
+criterion = { version = "0.5", features = ["html_reports"] }
+
+[[bench]]
+name = "backprop_bench"
+harness = false
\ No newline at end of file
diff --git a/juno_samples/rodinia/backprop/benches/backprop_bench.rs b/juno_samples/rodinia/backprop/benches/backprop_bench.rs
new file mode 100644
index 00000000..40cad98f
--- /dev/null
+++ b/juno_samples/rodinia/backprop/benches/backprop_bench.rs
@@ -0,0 +1,65 @@
+#![feature(concat_idents)]
+use criterion::{criterion_group, criterion_main, Criterion};
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
+
+juno_build::juno!("backprop");
+
+fn backprop_bench(c: &mut Criterion) {
+    let mut group = c.benchmark_group("backprop bench");
+    group.sample_size(10);
+
+    let mut rng = StdRng::seed_from_u64(7);
+
+    let input_n = 65536;
+    let hidden_n = 16;
+    let output_n = 1;
+
+    let mut input_vals = vec![0.0f32; input_n + 1];
+    input_vals[0] = 1.0;
+
+    // For some reason the bpnn_randomize_row function used on target just sets it to 0.1
+    let target = vec![0.1f32; output_n + 1];
+
+    let input_weights = (0..(input_n + 1) * (hidden_n + 1))
+        .map(|_| rng.random::<f32>())
+        .collect::<Vec<_>>();
+    let hidden_weights = (0..(hidden_n + 1) * (output_n + 1))
+        .map(|_| rng.random::<f32>())
+        .collect::<Vec<_>>();
+
+    let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)];
+    let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)];
+
+    let mut r = runner!(backprop);
+    let input_vals = HerculesImmBox::from(&input_vals as &[f32]);
+    let target = HerculesImmBox::from(&target as &[f32]);
+    let mut input_weights = HerculesMutBox::from(input_weights.to_vec());
+    let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec());
+    let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec());
+    let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec());
+
+    group.bench_function("backprop bench", |b| {
+        b.iter(|| {
+            async_std::task::block_on(async {
+                r.run(
+                    input_n as u64,
+                    hidden_n as u64,
+                    output_n as u64,
+                    input_vals.to(),
+                    input_weights.to(),
+                    hidden_weights.to(),
+                    target.to(),
+                    input_prev_weights.to(),
+                    hidden_prev_weights.to(),
+                )
+                .await
+            });
+        })
+    });
+}
+
+criterion_group!(benches, backprop_bench);
+criterion_main!(benches);
diff --git a/juno_samples/rodinia/backprop/src/lib.rs b/juno_samples/rodinia/backprop/src/lib.rs
new file mode 100644
index 00000000..e2fc2ad5
--- /dev/null
+++ b/juno_samples/rodinia/backprop/src/lib.rs
@@ -0,0 +1,156 @@
+#![feature(concat_idents)]
+
+juno_build::juno!("backprop");
+
+mod rust_backprop;
+
+use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
+
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+use clap::Parser;
+
+#[derive(Parser)]
+#[clap(author, version, about, long_about = None)]
+pub struct BackpropInputs {
+    pub layer_size: usize,
+}
+
+fn run_backprop(
+    input_n: u64,
+    hidden_n: u64,
+    output_n: u64,
+    input_vals: &[f32],
+    input_weights: &[f32],
+    hidden_weights: &[f32],
+    target: &[f32],
+    input_prev_weights: &[f32],
+    hidden_prev_weights: &[f32],
+) -> (f32, f32, Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) {
+    let input_vals = HerculesImmBox::from(input_vals);
+    let target = HerculesImmBox::from(target);
+
+    let mut input_weights = HerculesMutBox::from(input_weights.to_vec());
+    let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec());
+    let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec());
+    let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec());
+
+    let mut runner = runner!(backprop);
+    let (out_err, hid_err, input_weights, input_prev_weights, hidden_weights, hidden_prev_weights) =
+        async_std::task::block_on(async {
+            runner
+                .run(
+                    input_n,
+                    hidden_n,
+                    output_n,
+                    input_vals.to(),
+                    input_weights.to(),
+                    hidden_weights.to(),
+                    target.to(),
+                    input_prev_weights.to(),
+                    hidden_prev_weights.to(),
+                )
+                .await
+        });
+    let mut input_weights = HerculesMutBox::from(input_weights);
+    let mut hidden_weights = HerculesMutBox::from(hidden_weights);
+    let mut input_prev_weights = HerculesMutBox::from(input_prev_weights);
+    let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights);
+
+    (
+        out_err,
+        hid_err,
+        input_weights.as_slice().to_vec(),
+        hidden_weights.as_slice().to_vec(),
+        input_prev_weights.as_slice().to_vec(),
+        hidden_prev_weights.as_slice().to_vec(),
+    )
+}
+
+fn compare_float(x: f32, y: f32) -> bool {
+    (x - y).abs() < 1e-5
+}
+
+fn compare_floats(xs: &[f32], ys: &[f32]) -> bool {
+    xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y))
+}
+
+pub fn backprop_harness(args: BackpropInputs) {
+    let BackpropInputs { layer_size } = args;
+
+    let mut rng = StdRng::seed_from_u64(7);
+
+    let input_n = layer_size;
+    let hidden_n = 16;
+    let output_n = 1;
+
+    let mut input_vals = vec![0.0; input_n + 1];
+    input_vals[0] = 1.0;
+
+    // For some reason the bpnn_randomize_row function used on target just sets it to 0.1
+    let target = vec![0.1; output_n + 1];
+
+    let input_weights = (0..(input_n + 1) * (hidden_n + 1))
+        .map(|_| rng.random::<f32>())
+        .collect::<Vec<_>>();
+    let hidden_weights = (0..(hidden_n + 1) * (output_n + 1))
+        .map(|_| rng.random::<f32>())
+        .collect::<Vec<_>>();
+
+    let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)];
+    let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)];
+
+    let (
+        juno_out_err,
+        juno_hid_err,
+        juno_input_weights,
+        juno_hidden_weights,
+        juno_input_prev_weights,
+        juno_hidden_prev_weights,
+    ) = run_backprop(
+        input_n as u64,
+        hidden_n as u64,
+        output_n as u64,
+        &input_vals,
+        &input_weights,
+        &hidden_weights,
+        &target,
+        &input_prev_weights,
+        &hidden_prev_weights,
+    );
+
+    let (
+        rust_out_err,
+        rust_hid_err,
+        rust_input_weights,
+        rust_hidden_weights,
+        rust_input_prev_weights,
+        rust_hidden_prev_weights,
+    ) = rust_backprop::backprop(
+        input_n,
+        hidden_n,
+        output_n,
+        &input_vals,
+        input_weights,
+        hidden_weights,
+        &target,
+        input_prev_weights,
+        hidden_prev_weights,
+    );
+
+    assert!(compare_float(juno_out_err, rust_out_err));
+    assert!(compare_float(juno_hid_err, rust_hid_err));
+    if !compare_floats(&juno_input_weights, &rust_input_weights) {
+        panic!("Input weights do not match after training");
+    }
+    if !compare_floats(&juno_hidden_weights, &rust_hidden_weights) {
+        panic!("Hidden weights do not match after training");
+    }
+    if !compare_floats(&juno_input_prev_weights, &rust_input_prev_weights) {
+        panic!("Input prev_weights do not match after training");
+    }
+    if !compare_floats(&juno_hidden_prev_weights, &rust_hidden_prev_weights) {
+        panic!("Hidden prev_weights do not match after training");
+    }
+}
diff --git a/juno_samples/rodinia/backprop/src/main.rs b/juno_samples/rodinia/backprop/src/main.rs
index fa80a7a5..bb0d13a1 100644
--- a/juno_samples/rodinia/backprop/src/main.rs
+++ b/juno_samples/rodinia/backprop/src/main.rs
@@ -1,159 +1,6 @@
-#![feature(concat_idents)]
-
-juno_build::juno!("backprop");
-
-mod rust_backprop;
-
-use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
-
-use rand::rngs::StdRng;
-use rand::{Rng, SeedableRng};
-
 use clap::Parser;
 
-#[derive(Parser)]
-#[clap(author, version, about, long_about = None)]
-struct BackpropInputs {
-    layer_size: usize,
-}
-
-fn run_backprop(
-    input_n: u64,
-    hidden_n: u64,
-    output_n: u64,
-    input_vals: &[f32],
-    input_weights: &[f32],
-    hidden_weights: &[f32],
-    target: &[f32],
-    input_prev_weights: &[f32],
-    hidden_prev_weights: &[f32],
-) -> (f32, f32, Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) {
-    let input_vals = HerculesImmBox::from(input_vals);
-    let target = HerculesImmBox::from(target);
-
-    let mut input_weights = HerculesMutBox::from(input_weights.to_vec());
-    let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec());
-    let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec());
-    let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec());
-
-    let mut runner = runner!(backprop);
-    let (out_err, hid_err, input_weights, input_prev_weights, hidden_weights, hidden_prev_weights) =
-        async_std::task::block_on(async {
-            runner
-                .run(
-                    input_n,
-                    hidden_n,
-                    output_n,
-                    input_vals.to(),
-                    input_weights.to(),
-                    hidden_weights.to(),
-                    target.to(),
-                    input_prev_weights.to(),
-                    hidden_prev_weights.to(),
-                )
-                .await
-        });
-    let mut input_weights = HerculesMutBox::from(input_weights);
-    let mut hidden_weights = HerculesMutBox::from(hidden_weights);
-    let mut input_prev_weights = HerculesMutBox::from(input_prev_weights);
-    let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights);
-
-    (
-        out_err,
-        hid_err,
-        input_weights.as_slice().to_vec(),
-        hidden_weights.as_slice().to_vec(),
-        input_prev_weights.as_slice().to_vec(),
-        hidden_prev_weights.as_slice().to_vec(),
-    )
-}
-
-fn compare_float(x: f32, y: f32) -> bool {
-    (x - y).abs() < 1e-5
-}
-
-fn compare_floats(xs: &[f32], ys: &[f32]) -> bool {
-    xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y))
-}
-
-fn backprop_harness(args: BackpropInputs) {
-    let BackpropInputs { layer_size } = args;
-
-    let mut rng = StdRng::seed_from_u64(7);
-
-    let input_n = layer_size;
-    let hidden_n = 16;
-    let output_n = 1;
-
-    let mut input_vals = vec![0.0; input_n + 1];
-    input_vals[0] = 1.0;
-
-    // For some reason the bpnn_randomize_row function used on target just sets it to 0.1
-    let target = vec![0.1; output_n + 1];
-
-    let input_weights = (0..(input_n + 1) * (hidden_n + 1))
-        .map(|_| rng.random::<f32>())
-        .collect::<Vec<_>>();
-    let hidden_weights = (0..(hidden_n + 1) * (output_n + 1))
-        .map(|_| rng.random::<f32>())
-        .collect::<Vec<_>>();
-
-    let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)];
-    let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)];
-
-    let (
-        juno_out_err,
-        juno_hid_err,
-        juno_input_weights,
-        juno_hidden_weights,
-        juno_input_prev_weights,
-        juno_hidden_prev_weights,
-    ) = run_backprop(
-        input_n as u64,
-        hidden_n as u64,
-        output_n as u64,
-        &input_vals,
-        &input_weights,
-        &hidden_weights,
-        &target,
-        &input_prev_weights,
-        &hidden_prev_weights,
-    );
-
-    let (
-        rust_out_err,
-        rust_hid_err,
-        rust_input_weights,
-        rust_hidden_weights,
-        rust_input_prev_weights,
-        rust_hidden_prev_weights,
-    ) = rust_backprop::backprop(
-        input_n,
-        hidden_n,
-        output_n,
-        &input_vals,
-        input_weights,
-        hidden_weights,
-        &target,
-        input_prev_weights,
-        hidden_prev_weights,
-    );
-
-    assert!(compare_float(juno_out_err, rust_out_err));
-    assert!(compare_float(juno_hid_err, rust_hid_err));
-    if !compare_floats(&juno_input_weights, &rust_input_weights) {
-        panic!("Input weights do not match after training");
-    }
-    if !compare_floats(&juno_hidden_weights, &rust_hidden_weights) {
-        panic!("Hidden weights do not match after training");
-    }
-    if !compare_floats(&juno_input_prev_weights, &rust_input_prev_weights) {
-        panic!("Input prev_weights do not match after training");
-    }
-    if !compare_floats(&juno_hidden_prev_weights, &rust_hidden_prev_weights) {
-        panic!("Hidden prev_weights do not match after training");
-    }
-}
+use juno_backprop::{backprop_harness, BackpropInputs};
 
 fn main() {
     let args = BackpropInputs::parse();
-- 
GitLab


From cbd5e70e22944f444da8cb1bebf310eea9c13622 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 25 Feb 2025 17:46:04 -0600
Subject: [PATCH 2/5] whyyyyy

---
 juno_samples/rodinia/backprop/benches/backprop_bench.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/juno_samples/rodinia/backprop/benches/backprop_bench.rs b/juno_samples/rodinia/backprop/benches/backprop_bench.rs
index 40cad98f..17bdf6a7 100644
--- a/juno_samples/rodinia/backprop/benches/backprop_bench.rs
+++ b/juno_samples/rodinia/backprop/benches/backprop_bench.rs
@@ -7,6 +7,11 @@ use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, Herc
 
 juno_build::juno!("backprop");
 
+// We need this even though we don't use anything from the library because of
+// Rust build scripts only linking static libraries into the library, and not
+// into the benchmark binary. Ugh!
+use juno_backprop::*;
+
 fn backprop_bench(c: &mut Criterion) {
     let mut group = c.benchmark_group("backprop bench");
     group.sample_size(10);
-- 
GitLab


From b4e9f25846d161122e147083508dde02dd975752 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 25 Feb 2025 17:56:13 -0600
Subject: [PATCH 3/5] bfs bench

---
 Cargo.lock                                    |  1 +
 juno_samples/rodinia/bfs/Cargo.toml           | 10 +++++
 juno_samples/rodinia/bfs/benches/bfs_bench.rs | 41 +++++++++++++++++
 juno_samples/rodinia/bfs/src/lib.rs           | 44 +++++++++++++++++++
 juno_samples/rodinia/bfs/src/main.rs          | 43 +-----------------
 5 files changed, 97 insertions(+), 42 deletions(-)
 create mode 100644 juno_samples/rodinia/bfs/benches/bfs_bench.rs
 create mode 100644 juno_samples/rodinia/bfs/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index 5916a17c..81394ef6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1252,6 +1252,7 @@ version = "0.1.0"
 dependencies = [
  "async-std",
  "clap",
+ "criterion",
  "hercules_rt",
  "juno_build",
  "nom 8.0.0",
diff --git a/juno_samples/rodinia/bfs/Cargo.toml b/juno_samples/rodinia/bfs/Cargo.toml
index 2ae6c8c0..34b6f5ce 100644
--- a/juno_samples/rodinia/bfs/Cargo.toml
+++ b/juno_samples/rodinia/bfs/Cargo.toml
@@ -8,6 +8,9 @@ edition = "2021"
 name = "juno_bfs"
 path = "src/main.rs"
 
+[lib]
+path = "src/lib.rs"
+
 [features]
 cuda = ["juno_build/cuda", "hercules_rt/cuda"]
 
@@ -21,3 +24,10 @@ async-std = "*"
 clap = { version = "*", features = ["derive"] }
 with_builtin_macros = "0.1.0"
 nom = "*"
+
+[dev-dependencies]
+criterion = { version = "0.5", features = ["html_reports"] }
+
+[[bench]]
+name = "bfs_bench"
+harness = false
diff --git a/juno_samples/rodinia/bfs/benches/bfs_bench.rs b/juno_samples/rodinia/bfs/benches/bfs_bench.rs
new file mode 100644
index 00000000..bf39a0fc
--- /dev/null
+++ b/juno_samples/rodinia/bfs/benches/bfs_bench.rs
@@ -0,0 +1,41 @@
+#![feature(concat_idents)]
+use criterion::{criterion_group, criterion_main, Criterion};
+
+use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo};
+
+juno_build::juno!("bfs");
+
+use juno_bfs::graph_parser::*;
+
+fn bfs_bench(c: &mut Criterion) {
+    let mut group = c.benchmark_group("bfs bench");
+
+    let mut r = runner!(bfs);
+
+    let input = "data/graph4096.txt";
+    let (nodes, source, edges) = parse_graph(input.into());
+    let n = nodes.len() as u64;
+    let m = edges.len() as u64;
+    let nodes = HerculesImmBox::from(&nodes as &[Node]);
+    let edges = HerculesImmBox::from(&edges as &[u32]);
+    group.bench_function("bfs bench 4096", |b| {
+        b.iter(|| {
+            async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await });
+        })
+    });
+
+    let input = "data/graph65536.txt";
+    let (nodes, source, edges) = parse_graph(input.into());
+    let n = nodes.len() as u64;
+    let m = edges.len() as u64;
+    let nodes = HerculesImmBox::from(&nodes as &[Node]);
+    let edges = HerculesImmBox::from(&edges as &[u32]);
+    group.bench_function("bfs bench 65536", |b| {
+        b.iter(|| {
+            async_std::task::block_on(async { r.run(n, m, nodes.to(), source, edges.to()).await });
+        })
+    });
+}
+
+criterion_group!(benches, bfs_bench);
+criterion_main!(benches);
diff --git a/juno_samples/rodinia/bfs/src/lib.rs b/juno_samples/rodinia/bfs/src/lib.rs
new file mode 100644
index 00000000..218e9bb0
--- /dev/null
+++ b/juno_samples/rodinia/bfs/src/lib.rs
@@ -0,0 +1,44 @@
+#![feature(concat_idents)]
+pub mod graph_parser;
+mod rust_bfs;
+
+use graph_parser::*;
+
+use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox};
+
+use clap::Parser;
+
+juno_build::juno!("bfs");
+
+#[derive(Parser)]
+#[clap(author, version, about, long_about = None)]
+pub struct BFSInputs {
+    pub input: String,
+}
+
+fn run_bfs(nodes: &[Node], source: u32, edges: &[u32]) -> Vec<i32> {
+    let n = nodes.len() as u64;
+    let m = edges.len() as u64;
+
+    let nodes = HerculesImmBox::from(nodes);
+    let edges = HerculesImmBox::from(edges);
+
+    let mut runner = runner!(bfs);
+
+    HerculesMutBox::from(async_std::task::block_on(async {
+        runner.run(n, m, nodes.to(), source, edges.to()).await
+    }))
+    .as_slice()
+    .to_vec()
+}
+
+pub fn bfs_harness(args: BFSInputs) {
+    let BFSInputs { input } = args;
+
+    let (nodes, source, edges) = parse_graph(input);
+
+    let costs_juno = run_bfs(&nodes, source, &edges);
+    let costs_ref = rust_bfs::bfs(&nodes, source, &edges);
+
+    assert_eq!(costs_juno, costs_ref);
+}
diff --git a/juno_samples/rodinia/bfs/src/main.rs b/juno_samples/rodinia/bfs/src/main.rs
index 21e48c35..0ad23b00 100644
--- a/juno_samples/rodinia/bfs/src/main.rs
+++ b/juno_samples/rodinia/bfs/src/main.rs
@@ -1,47 +1,6 @@
-#![feature(concat_idents)]
-mod graph_parser;
-mod rust_bfs;
-
-use graph_parser::*;
-
-use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox};
-
 use clap::Parser;
 
-juno_build::juno!("bfs");
-
-#[derive(Parser)]
-#[clap(author, version, about, long_about = None)]
-struct BFSInputs {
-    input: String,
-}
-
-fn run_bfs(nodes: &[Node], source: u32, edges: &[u32]) -> Vec<i32> {
-    let n = nodes.len() as u64;
-    let m = edges.len() as u64;
-
-    let nodes = HerculesImmBox::from(nodes);
-    let edges = HerculesImmBox::from(edges);
-
-    let mut runner = runner!(bfs);
-
-    HerculesMutBox::from(async_std::task::block_on(async {
-        runner.run(n, m, nodes.to(), source, edges.to()).await
-    }))
-    .as_slice()
-    .to_vec()
-}
-
-fn bfs_harness(args: BFSInputs) {
-    let BFSInputs { input } = args;
-
-    let (nodes, source, edges) = parse_graph(input);
-
-    let costs_juno = run_bfs(&nodes, source, &edges);
-    let costs_ref = rust_bfs::bfs(&nodes, source, &edges);
-
-    assert_eq!(costs_juno, costs_ref);
-}
+use juno_bfs::{bfs_harness, BFSInputs};
 
 fn main() {
     let args = BFSInputs::parse();
-- 
GitLab


From 5f96afc726c24c14f6b612a74b01fa47b45f1ab5 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 25 Feb 2025 21:20:33 -0600
Subject: [PATCH 4/5] cfd bench

---
 Cargo.lock                                    |   1 +
 juno_samples/rodinia/cfd/Cargo.toml           |  10 +
 juno_samples/rodinia/cfd/benches/cfd_bench.rs | 130 ++++++++++
 juno_samples/rodinia/cfd/src/lib.rs           | 236 ++++++++++++++++++
 juno_samples/rodinia/cfd/src/main.rs          | 235 +----------------
 5 files changed, 378 insertions(+), 234 deletions(-)
 create mode 100644 juno_samples/rodinia/cfd/benches/cfd_bench.rs
 create mode 100644 juno_samples/rodinia/cfd/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index 81394ef6..170a7f3e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1297,6 +1297,7 @@ version = "0.1.0"
 dependencies = [
  "async-std",
  "clap",
+ "criterion",
  "hercules_rt",
  "juno_build",
  "nom 8.0.0",
diff --git a/juno_samples/rodinia/cfd/Cargo.toml b/juno_samples/rodinia/cfd/Cargo.toml
index 542ca7a2..6720b527 100644
--- a/juno_samples/rodinia/cfd/Cargo.toml
+++ b/juno_samples/rodinia/cfd/Cargo.toml
@@ -8,6 +8,9 @@ edition = "2021"
 name = "juno_cfd"
 path = "src/main.rs"
 
+[lib]
+path = "src/lib.rs"
+
 [features]
 cuda = ["juno_build/cuda", "hercules_rt/cuda"]
 
@@ -21,3 +24,10 @@ async-std = "*"
 clap = { version = "*", features = ["derive"] }
 with_builtin_macros = "0.1.0"
 nom = "*"
+
+[dev-dependencies]
+criterion = { version = "0.5", features = ["html_reports"] }
+
+[[bench]]
+name = "cfd_bench"
+harness = false
diff --git a/juno_samples/rodinia/cfd/benches/cfd_bench.rs b/juno_samples/rodinia/cfd/benches/cfd_bench.rs
new file mode 100644
index 00000000..fd614b42
--- /dev/null
+++ b/juno_samples/rodinia/cfd/benches/cfd_bench.rs
@@ -0,0 +1,130 @@
+#![feature(concat_idents)]
+use criterion::{criterion_group, criterion_main, Criterion};
+
+use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
+
+juno_build::juno!("euler");
+juno_build::juno!("pre_euler");
+
+use juno_cfd::*;
+
+fn cfd_bench(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cfd bench");
+
+    let mut r = runner!(euler);
+    let data_file = "data/fvcorr.domn.097K".to_string();
+    let iterations = 1;
+    let block_size = 16;
+    let FarFieldConditions {
+        ff_variable,
+        ff_fc_momentum_x,
+        ff_fc_momentum_y,
+        ff_fc_momentum_z,
+        ff_fc_density_energy,
+    } = set_far_field_conditions();
+    let GeometryData {
+        nelr,
+        areas,
+        elements_surrounding_elements,
+        normals,
+    } = read_domain_geometry(data_file, block_size);
+    let mut variables = initialize_variables(nelr, ff_variable.as_slice());
+    let mut variables = HerculesMutBox::from(variables.as_mut_slice());
+    let areas = HerculesImmBox::from(areas.as_slice());
+    let elements_surrounding_elements =
+        HerculesImmBox::from(elements_surrounding_elements.as_slice());
+    let normals = HerculesImmBox::from(normals.as_slice());
+    let ff_variable = HerculesImmBox::from(ff_variable.as_slice());
+    let ff_fc_density_energy = vec![
+        ff_fc_density_energy.x,
+        ff_fc_density_energy.y,
+        ff_fc_density_energy.z,
+    ];
+    let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice());
+    let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z];
+    let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice());
+    let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z];
+    let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice());
+    let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z];
+    let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice());
+    group.bench_function("cfd bench euler", |b| {
+        b.iter(|| {
+            async_std::task::block_on(async {
+                r.run(
+                    nelr as u64,
+                    iterations as u64,
+                    variables.to(),
+                    areas.to(),
+                    elements_surrounding_elements.to(),
+                    normals.to(),
+                    ff_variable.to(),
+                    ff_fc_density_energy.to(),
+                    ff_fc_momentum_x.to(),
+                    ff_fc_momentum_y.to(),
+                    ff_fc_momentum_z.to(),
+                )
+                .await
+            });
+        })
+    });
+
+    let mut r = runner!(pre_euler);
+    let data_file = "data/fvcorr.domn.097K".to_string();
+    let iterations = 1;
+    let block_size = 16;
+    let FarFieldConditions {
+        ff_variable,
+        ff_fc_momentum_x,
+        ff_fc_momentum_y,
+        ff_fc_momentum_z,
+        ff_fc_density_energy,
+    } = set_far_field_conditions();
+    let GeometryData {
+        nelr,
+        areas,
+        elements_surrounding_elements,
+        normals,
+    } = read_domain_geometry(data_file, block_size);
+    let mut variables = initialize_variables(nelr, ff_variable.as_slice());
+    let mut variables = HerculesMutBox::from(variables.as_mut_slice());
+    let areas = HerculesImmBox::from(areas.as_slice());
+    let elements_surrounding_elements =
+        HerculesImmBox::from(elements_surrounding_elements.as_slice());
+    let normals = HerculesImmBox::from(normals.as_slice());
+    let ff_variable = HerculesImmBox::from(ff_variable.as_slice());
+    let ff_fc_density_energy = vec![
+        ff_fc_density_energy.x,
+        ff_fc_density_energy.y,
+        ff_fc_density_energy.z,
+    ];
+    let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice());
+    let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z];
+    let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice());
+    let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z];
+    let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice());
+    let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z];
+    let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice());
+    group.bench_function("cfd bench pre-euler", |b| {
+        b.iter(|| {
+            async_std::task::block_on(async {
+                r.run(
+                    nelr as u64,
+                    iterations as u64,
+                    variables.to(),
+                    areas.to(),
+                    elements_surrounding_elements.to(),
+                    normals.to(),
+                    ff_variable.to(),
+                    ff_fc_density_energy.to(),
+                    ff_fc_momentum_x.to(),
+                    ff_fc_momentum_y.to(),
+                    ff_fc_momentum_z.to(),
+                )
+                .await
+            });
+        })
+    });
+}
+
+criterion_group!(benches, cfd_bench);
+criterion_main!(benches);
diff --git a/juno_samples/rodinia/cfd/src/lib.rs b/juno_samples/rodinia/cfd/src/lib.rs
new file mode 100644
index 00000000..39384c0d
--- /dev/null
+++ b/juno_samples/rodinia/cfd/src/lib.rs
@@ -0,0 +1,236 @@
+#![feature(concat_idents)]
+mod rust_cfd;
+mod setup;
+
+use clap::Parser;
+
+pub use crate::setup::*;
+
+use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
+
+juno_build::juno!("euler");
+juno_build::juno!("pre_euler");
+
+#[derive(Parser)]
+#[clap(author, version, about, long_about = None)]
+pub struct CFDInputs {
+    pub data_file: String,
+    pub iterations: usize,
+    pub block_size: usize,
+    #[clap(short = None, long = Some("pre-euler"))]
+    pub pre_euler: bool,
+}
+
+fn run_euler(
+    nelr: usize,
+    iterations: usize,
+    mut variables: AlignedSlice<f32>,
+    areas: &[f32],
+    elements_surrounding_elements: &[i32],
+    normals: &[f32],
+    ff_variable: &[f32],
+    ff_fc_density_energy: &Float3,
+    ff_fc_momentum_x: &Float3,
+    ff_fc_momentum_y: &Float3,
+    ff_fc_momentum_z: &Float3,
+) -> Vec<f32> {
+    let mut variables = HerculesMutBox::from(variables.as_mut_slice());
+    let areas = HerculesImmBox::from(areas);
+    let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements);
+    let normals = HerculesImmBox::from(normals);
+    let ff_variable = HerculesImmBox::from(ff_variable);
+
+    // TODO: Make hercules box handle structs, for now we'll copy into a vec
+    let ff_fc_density_energy = vec![
+        ff_fc_density_energy.x,
+        ff_fc_density_energy.y,
+        ff_fc_density_energy.z,
+    ];
+    let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice());
+    let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z];
+    let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice());
+    let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z];
+    let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice());
+    let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z];
+    let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice());
+
+    let mut runner = runner!(euler);
+
+    HerculesMutBox::from(async_std::task::block_on(async {
+        runner
+            .run(
+                nelr as u64,
+                iterations as u64,
+                variables.to(),
+                areas.to(),
+                elements_surrounding_elements.to(),
+                normals.to(),
+                ff_variable.to(),
+                ff_fc_density_energy.to(),
+                ff_fc_momentum_x.to(),
+                ff_fc_momentum_y.to(),
+                ff_fc_momentum_z.to(),
+            )
+            .await
+    }))
+    .as_slice()
+    .to_vec()
+}
+
+fn run_pre_euler(
+    nelr: usize,
+    iterations: usize,
+    mut variables: AlignedSlice<f32>,
+    areas: &[f32],
+    elements_surrounding_elements: &[i32],
+    normals: &[f32],
+    ff_variable: &[f32],
+    ff_fc_density_energy: &Float3,
+    ff_fc_momentum_x: &Float3,
+    ff_fc_momentum_y: &Float3,
+    ff_fc_momentum_z: &Float3,
+) -> Vec<f32> {
+    let mut variables = HerculesMutBox::from(variables.as_mut_slice());
+    let areas = HerculesImmBox::from(areas);
+    let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements);
+    let normals = HerculesImmBox::from(normals);
+    let ff_variable = HerculesImmBox::from(ff_variable);
+
+    // TODO: Make hercules box handle structs, for now we'll copy into a vec
+    let ff_fc_density_energy = vec![
+        ff_fc_density_energy.x,
+        ff_fc_density_energy.y,
+        ff_fc_density_energy.z,
+    ];
+    let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice());
+    let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z];
+    let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice());
+    let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z];
+    let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice());
+    let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z];
+    let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice());
+
+    let mut runner = runner!(pre_euler);
+
+    let variables = variables.to();
+
+    HerculesMutBox::from(async_std::task::block_on(async {
+        runner
+            .run(
+                nelr as u64,
+                iterations as u64,
+                variables,
+                areas.to(),
+                elements_surrounding_elements.to(),
+                normals.to(),
+                ff_variable.to(),
+                ff_fc_density_energy.to(),
+                ff_fc_momentum_x.to(),
+                ff_fc_momentum_y.to(),
+                ff_fc_momentum_z.to(),
+            )
+            .await
+    }))
+    .as_slice()
+    .to_vec()
+}
+
+fn compare_float(x: f32, y: f32) -> bool {
+    (x - y).abs() < 1e-5
+}
+
+fn compare_floats(xs: &[f32], ys: &[f32]) -> bool {
+    xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y))
+}
+
+pub fn cfd_harness(args: CFDInputs) {
+    let CFDInputs {
+        data_file,
+        iterations,
+        block_size,
+        pre_euler,
+    } = args;
+
+    assert!(block_size % 16 == 0, "Hercules expects all arrays to be 64-byte aligned, cfd uses structs of arrays that are annoying to deal with if the block_size is not a multiple of 16");
+
+    let FarFieldConditions {
+        ff_variable,
+        ff_fc_momentum_x,
+        ff_fc_momentum_y,
+        ff_fc_momentum_z,
+        ff_fc_density_energy,
+    } = set_far_field_conditions();
+
+    let GeometryData {
+        nelr,
+        areas,
+        elements_surrounding_elements,
+        normals,
+    } = read_domain_geometry(data_file, block_size);
+
+    let variables = initialize_variables(nelr, ff_variable.as_slice());
+
+    let res_juno = if pre_euler {
+        run_pre_euler(
+            nelr,
+            iterations,
+            variables.clone(),
+            areas.as_slice(),
+            elements_surrounding_elements.as_slice(),
+            normals.as_slice(),
+            ff_variable.as_slice(),
+            &ff_fc_density_energy,
+            &ff_fc_momentum_x,
+            &ff_fc_momentum_y,
+            &ff_fc_momentum_z,
+        )
+    } else {
+        run_euler(
+            nelr,
+            iterations,
+            variables.clone(),
+            areas.as_slice(),
+            elements_surrounding_elements.as_slice(),
+            normals.as_slice(),
+            ff_variable.as_slice(),
+            &ff_fc_density_energy,
+            &ff_fc_momentum_x,
+            &ff_fc_momentum_y,
+            &ff_fc_momentum_z,
+        )
+    };
+    let res_rust = if pre_euler {
+        rust_cfd::pre_euler(
+            nelr,
+            iterations,
+            variables,
+            areas.as_slice(),
+            elements_surrounding_elements.as_slice(),
+            normals.as_slice(),
+            ff_variable.as_slice(),
+            &ff_fc_density_energy,
+            &ff_fc_momentum_x,
+            &ff_fc_momentum_y,
+            &ff_fc_momentum_z,
+        )
+    } else {
+        rust_cfd::euler(
+            nelr,
+            iterations,
+            variables,
+            areas.as_slice(),
+            elements_surrounding_elements.as_slice(),
+            normals.as_slice(),
+            ff_variable.as_slice(),
+            &ff_fc_density_energy,
+            &ff_fc_momentum_x,
+            &ff_fc_momentum_y,
+            &ff_fc_momentum_z,
+        )
+    };
+
+    if !compare_floats(&res_juno, res_rust.as_slice()) {
+        assert_eq!(res_juno.len(), res_rust.as_slice().len());
+        panic!("Mismatch in results");
+    }
+}
diff --git a/juno_samples/rodinia/cfd/src/main.rs b/juno_samples/rodinia/cfd/src/main.rs
index fab241fa..277a3edb 100644
--- a/juno_samples/rodinia/cfd/src/main.rs
+++ b/juno_samples/rodinia/cfd/src/main.rs
@@ -1,239 +1,6 @@
-#![feature(concat_idents)]
-mod rust_cfd;
-mod setup;
-
 use clap::Parser;
 
-use crate::setup::*;
-
-use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
-
-juno_build::juno!("euler");
-juno_build::juno!("pre_euler");
-
-#[derive(Parser)]
-#[clap(author, version, about, long_about = None)]
-struct CFDInputs {
-    data_file: String,
-    iterations: usize,
-    block_size: usize,
-    #[clap(short = None, long = Some("pre-euler"))]
-    pre_euler: bool,
-}
-
-fn run_euler(
-    nelr: usize,
-    iterations: usize,
-    mut variables: AlignedSlice<f32>,
-    areas: &[f32],
-    elements_surrounding_elements: &[i32],
-    normals: &[f32],
-    ff_variable: &[f32],
-    ff_fc_density_energy: &Float3,
-    ff_fc_momentum_x: &Float3,
-    ff_fc_momentum_y: &Float3,
-    ff_fc_momentum_z: &Float3,
-) -> Vec<f32> {
-    let mut variables = HerculesMutBox::from(variables.as_mut_slice());
-    let areas = HerculesImmBox::from(areas);
-    let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements);
-    let normals = HerculesImmBox::from(normals);
-    let ff_variable = HerculesImmBox::from(ff_variable);
-
-    // TODO: Make hercules box handle structs, for now we'll copy into a vec
-    let ff_fc_density_energy = vec![
-        ff_fc_density_energy.x,
-        ff_fc_density_energy.y,
-        ff_fc_density_energy.z,
-    ];
-    let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice());
-    let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z];
-    let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice());
-    let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z];
-    let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice());
-    let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z];
-    let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice());
-
-    let mut runner = runner!(euler);
-
-    HerculesMutBox::from(async_std::task::block_on(async {
-        runner
-            .run(
-                nelr as u64,
-                iterations as u64,
-                variables.to(),
-                areas.to(),
-                elements_surrounding_elements.to(),
-                normals.to(),
-                ff_variable.to(),
-                ff_fc_density_energy.to(),
-                ff_fc_momentum_x.to(),
-                ff_fc_momentum_y.to(),
-                ff_fc_momentum_z.to(),
-            )
-            .await
-    }))
-    .as_slice()
-    .to_vec()
-}
-
-fn run_pre_euler(
-    nelr: usize,
-    iterations: usize,
-    mut variables: AlignedSlice<f32>,
-    areas: &[f32],
-    elements_surrounding_elements: &[i32],
-    normals: &[f32],
-    ff_variable: &[f32],
-    ff_fc_density_energy: &Float3,
-    ff_fc_momentum_x: &Float3,
-    ff_fc_momentum_y: &Float3,
-    ff_fc_momentum_z: &Float3,
-) -> Vec<f32> {
-    let mut variables = HerculesMutBox::from(variables.as_mut_slice());
-    let areas = HerculesImmBox::from(areas);
-    let elements_surrounding_elements = HerculesImmBox::from(elements_surrounding_elements);
-    let normals = HerculesImmBox::from(normals);
-    let ff_variable = HerculesImmBox::from(ff_variable);
-
-    // TODO: Make hercules box handle structs, for now we'll copy into a vec
-    let ff_fc_density_energy = vec![
-        ff_fc_density_energy.x,
-        ff_fc_density_energy.y,
-        ff_fc_density_energy.z,
-    ];
-    let ff_fc_density_energy = HerculesImmBox::from(ff_fc_density_energy.as_slice());
-    let ff_fc_momentum_x = vec![ff_fc_momentum_x.x, ff_fc_momentum_x.y, ff_fc_momentum_x.z];
-    let ff_fc_momentum_x = HerculesImmBox::from(ff_fc_momentum_x.as_slice());
-    let ff_fc_momentum_y = vec![ff_fc_momentum_y.x, ff_fc_momentum_y.y, ff_fc_momentum_y.z];
-    let ff_fc_momentum_y = HerculesImmBox::from(ff_fc_momentum_y.as_slice());
-    let ff_fc_momentum_z = vec![ff_fc_momentum_z.x, ff_fc_momentum_z.y, ff_fc_momentum_z.z];
-    let ff_fc_momentum_z = HerculesImmBox::from(ff_fc_momentum_z.as_slice());
-
-    let mut runner = runner!(pre_euler);
-
-    let variables = variables.to();
-
-    HerculesMutBox::from(async_std::task::block_on(async {
-        runner
-            .run(
-                nelr as u64,
-                iterations as u64,
-                variables,
-                areas.to(),
-                elements_surrounding_elements.to(),
-                normals.to(),
-                ff_variable.to(),
-                ff_fc_density_energy.to(),
-                ff_fc_momentum_x.to(),
-                ff_fc_momentum_y.to(),
-                ff_fc_momentum_z.to(),
-            )
-            .await
-    }))
-    .as_slice()
-    .to_vec()
-}
-
-fn compare_float(x: f32, y: f32) -> bool {
-    (x - y).abs() < 1e-5
-}
-
-fn compare_floats(xs: &[f32], ys: &[f32]) -> bool {
-    xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y))
-}
-
-fn cfd_harness(args: CFDInputs) {
-    let CFDInputs {
-        data_file,
-        iterations,
-        block_size,
-        pre_euler,
-    } = args;
-
-    assert!(block_size % 16 == 0, "Hercules expects all arrays to be 64-byte aligned, cfd uses structs of arrays that are annoying to deal with if the block_size is not a multiple of 16");
-
-    let FarFieldConditions {
-        ff_variable,
-        ff_fc_momentum_x,
-        ff_fc_momentum_y,
-        ff_fc_momentum_z,
-        ff_fc_density_energy,
-    } = set_far_field_conditions();
-
-    let GeometryData {
-        nelr,
-        areas,
-        elements_surrounding_elements,
-        normals,
-    } = read_domain_geometry(data_file, block_size);
-
-    let variables = initialize_variables(nelr, ff_variable.as_slice());
-
-    let res_juno = if pre_euler {
-        run_pre_euler(
-            nelr,
-            iterations,
-            variables.clone(),
-            areas.as_slice(),
-            elements_surrounding_elements.as_slice(),
-            normals.as_slice(),
-            ff_variable.as_slice(),
-            &ff_fc_density_energy,
-            &ff_fc_momentum_x,
-            &ff_fc_momentum_y,
-            &ff_fc_momentum_z,
-        )
-    } else {
-        run_euler(
-            nelr,
-            iterations,
-            variables.clone(),
-            areas.as_slice(),
-            elements_surrounding_elements.as_slice(),
-            normals.as_slice(),
-            ff_variable.as_slice(),
-            &ff_fc_density_energy,
-            &ff_fc_momentum_x,
-            &ff_fc_momentum_y,
-            &ff_fc_momentum_z,
-        )
-    };
-    let res_rust = if pre_euler {
-        rust_cfd::pre_euler(
-            nelr,
-            iterations,
-            variables,
-            areas.as_slice(),
-            elements_surrounding_elements.as_slice(),
-            normals.as_slice(),
-            ff_variable.as_slice(),
-            &ff_fc_density_energy,
-            &ff_fc_momentum_x,
-            &ff_fc_momentum_y,
-            &ff_fc_momentum_z,
-        )
-    } else {
-        rust_cfd::euler(
-            nelr,
-            iterations,
-            variables,
-            areas.as_slice(),
-            elements_surrounding_elements.as_slice(),
-            normals.as_slice(),
-            ff_variable.as_slice(),
-            &ff_fc_density_energy,
-            &ff_fc_momentum_x,
-            &ff_fc_momentum_y,
-            &ff_fc_momentum_z,
-        )
-    };
-
-    if !compare_floats(&res_juno, res_rust.as_slice()) {
-        assert_eq!(res_juno.len(), res_rust.as_slice().len());
-        panic!("Mismatch in results");
-    }
-}
+use juno_cfd::{cfd_harness, CFDInputs};
 
 fn main() {
     let args = CFDInputs::parse();
-- 
GitLab


From 747b2f41304c0e5e7596bdcde35e45f5c7f73fd3 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 25 Feb 2025 21:26:13 -0600
Subject: [PATCH 5/5] srad bench

---
 Cargo.lock                                    |   1 +
 juno_samples/rodinia/srad/Cargo.toml          |  10 ++
 .../rodinia/srad/benches/srad_bench.rs        |  62 +++++++++
 juno_samples/rodinia/srad/src/lib.rs          | 123 ++++++++++++++++++
 juno_samples/rodinia/srad/src/main.rs         | 122 +----------------
 5 files changed, 197 insertions(+), 121 deletions(-)
 create mode 100644 juno_samples/rodinia/srad/benches/srad_bench.rs
 create mode 100644 juno_samples/rodinia/srad/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index 170a7f3e..61cde7f1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1493,6 +1493,7 @@ version = "0.1.0"
 dependencies = [
  "async-std",
  "clap",
+ "criterion",
  "hercules_rt",
  "juno_build",
  "nom 8.0.0",
diff --git a/juno_samples/rodinia/srad/Cargo.toml b/juno_samples/rodinia/srad/Cargo.toml
index e41a8871..facf8c3b 100644
--- a/juno_samples/rodinia/srad/Cargo.toml
+++ b/juno_samples/rodinia/srad/Cargo.toml
@@ -8,6 +8,9 @@ edition = "2021"
 name = "juno_srad"
 path = "src/main.rs"
 
+[lib]
+path = "src/lib.rs"
+
 [features]
 cuda = ["juno_build/cuda", "hercules_rt/cuda"]
 
@@ -21,3 +24,10 @@ async-std = "*"
 clap = { version = "*", features = ["derive"] }
 with_builtin_macros = "0.1.0"
 nom = "*"
+
+[dev-dependencies]
+criterion = { version = "0.5", features = ["html_reports"] }
+
+[[bench]]
+name = "srad_bench"
+harness = false
diff --git a/juno_samples/rodinia/srad/benches/srad_bench.rs b/juno_samples/rodinia/srad/benches/srad_bench.rs
new file mode 100644
index 00000000..d3274540
--- /dev/null
+++ b/juno_samples/rodinia/srad/benches/srad_bench.rs
@@ -0,0 +1,62 @@
+#![feature(concat_idents)]
+use criterion::{criterion_group, criterion_main, Criterion};
+
+use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
+
+juno_build::juno!("srad");
+
+use juno_srad::*;
+
+fn srad_bench(c: &mut Criterion) {
+    let mut group = c.benchmark_group("srad bench");
+
+    let mut r = runner!(srad);
+    let niter = 100;
+    let lambda = 0.5;
+    let nrows = 502;
+    let ncols = 458;
+    let image = "data/image.pgm".to_string();
+    let Image {
+        image: image_ori,
+        max,
+        rows: image_ori_rows,
+        cols: image_ori_cols,
+    } = read_graphics(image);
+    let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols);
+    let mut image_h = HerculesMutBox::from(image.clone());
+    let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>();
+    let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>();
+    let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>();
+    let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>();
+    // Fix boundary conditions
+    iN[0] = 0;
+    iS[nrows - 1] = (nrows - 1) as i32;
+    jW[0] = 0;
+    jE[ncols - 1] = (ncols - 1) as i32;
+    let iN_h = HerculesImmBox::from(iN.as_slice());
+    let iS_h = HerculesImmBox::from(iS.as_slice());
+    let jW_h = HerculesImmBox::from(jW.as_slice());
+    let jE_h = HerculesImmBox::from(jE.as_slice());
+    group.bench_function("srad bench", |b| {
+        b.iter(|| {
+            async_std::task::block_on(async {
+                r.run(
+                    nrows as u64,
+                    ncols as u64,
+                    niter as u64,
+                    image_h.to(),
+                    iN_h.to(),
+                    iS_h.to(),
+                    jW_h.to(),
+                    jE_h.to(),
+                    max,
+                    lambda,
+                )
+                .await
+            });
+        })
+    });
+}
+
+criterion_group!(benches, srad_bench);
+criterion_main!(benches);
diff --git a/juno_samples/rodinia/srad/src/lib.rs b/juno_samples/rodinia/srad/src/lib.rs
new file mode 100644
index 00000000..d6366007
--- /dev/null
+++ b/juno_samples/rodinia/srad/src/lib.rs
@@ -0,0 +1,123 @@
+#![feature(concat_idents)]
+mod graphics;
+mod rust_srad;
+
+pub use graphics::*;
+
+use clap::Parser;
+
+use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
+
+juno_build::juno!("srad");
+
+#[derive(Parser)]
+#[clap(author, version, about, long_about = None)]
+pub struct SRADInputs {
+    pub niter: usize,
+    pub lambda: f32,
+    pub nrows: usize,
+    pub ncols: usize,
+    pub image: String,
+    #[clap(short, long)]
+    pub output: Option<String>,
+    #[clap(short, long)]
+    pub verify: bool,
+    #[clap(long = "output-verify", value_name = "PATH")]
+    pub output_verify: Option<String>,
+}
+
+pub fn srad_harness(args: SRADInputs) {
+    async_std::task::block_on(async {
+        let SRADInputs {
+            niter,
+            lambda,
+            nrows,
+            ncols,
+            image,
+            output,
+            verify,
+            output_verify,
+        } = args;
+
+        let Image {
+            image: image_ori,
+            max,
+            rows: image_ori_rows,
+            cols: image_ori_cols,
+        } = read_graphics(image);
+        let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols);
+        let mut image_h = HerculesMutBox::from(image.clone());
+
+        let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>();
+        let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>();
+        let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>();
+        let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>();
+
+        // Fix boundary conditions
+        iN[0] = 0;
+        iS[nrows - 1] = (nrows - 1) as i32;
+        jW[0] = 0;
+        jE[ncols - 1] = (ncols - 1) as i32;
+
+        let iN_h = HerculesImmBox::from(iN.as_slice());
+        let iS_h = HerculesImmBox::from(iS.as_slice());
+        let jW_h = HerculesImmBox::from(jW.as_slice());
+        let jE_h = HerculesImmBox::from(jE.as_slice());
+
+        let mut runner = runner!(srad);
+        let result: Vec<f32> = HerculesMutBox::from(
+            runner
+                .run(
+                    nrows as u64,
+                    ncols as u64,
+                    niter as u64,
+                    image_h.to(),
+                    iN_h.to(),
+                    iS_h.to(),
+                    jW_h.to(),
+                    jE_h.to(),
+                    max,
+                    lambda,
+                )
+                .await,
+        )
+        .as_slice()
+        .to_vec();
+
+        if let Some(output) = output {
+            write_graphics(output, &result, nrows, ncols, max);
+        }
+
+        if verify {
+            let mut rust_result = image;
+            rust_srad::srad(
+                nrows,
+                ncols,
+                niter,
+                &mut rust_result,
+                &iN,
+                &iS,
+                &jW,
+                &jE,
+                max,
+                lambda,
+            );
+
+            if let Some(output) = output_verify {
+                write_graphics(output, &rust_result, nrows, ncols, max);
+            }
+
+            let max_diff = result
+                .iter()
+                .zip(rust_result.iter())
+                .map(|(a, b)| (*a as i32 - *b as i32).abs())
+                .max()
+                .unwrap_or(0);
+            assert!(
+                max_diff <= 1,
+                "Verification failed: maximum pixel difference of {} exceeds threshold of 1",
+                max_diff
+            );
+        }
+    })
+}
diff --git a/juno_samples/rodinia/srad/src/main.rs b/juno_samples/rodinia/srad/src/main.rs
index 1b99b41a..87d1e7e8 100644
--- a/juno_samples/rodinia/srad/src/main.rs
+++ b/juno_samples/rodinia/srad/src/main.rs
@@ -1,126 +1,6 @@
-#![feature(concat_idents)]
-mod graphics;
-mod rust_srad;
-
-use graphics::*;
-
 use clap::Parser;
 
-use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
-
-juno_build::juno!("srad");
-
-#[derive(Parser)]
-#[clap(author, version, about, long_about = None)]
-struct SRADInputs {
-    niter: usize,
-    lambda: f32,
-    nrows: usize,
-    ncols: usize,
-    image: String,
-    #[clap(short, long)]
-    output: Option<String>,
-    #[clap(short, long)]
-    verify: bool,
-    #[clap(long = "output-verify", value_name = "PATH")]
-    output_verify: Option<String>,
-}
-
-fn srad_harness(args: SRADInputs) {
-    async_std::task::block_on(async {
-        let SRADInputs {
-            niter,
-            lambda,
-            nrows,
-            ncols,
-            image,
-            output,
-            verify,
-            output_verify,
-        } = args;
-
-        let Image {
-            image: image_ori,
-            max,
-            rows: image_ori_rows,
-            cols: image_ori_cols,
-        } = read_graphics(image);
-        let image = resize(&image_ori, image_ori_rows, image_ori_cols, nrows, ncols);
-        let mut image_h = HerculesMutBox::from(image.clone());
-
-        let mut iN = (0..nrows).map(|i| i as i32 - 1).collect::<Vec<_>>();
-        let mut iS = (0..nrows).map(|i| i as i32 + 1).collect::<Vec<_>>();
-        let mut jW = (0..ncols).map(|j| j as i32 - 1).collect::<Vec<_>>();
-        let mut jE = (0..ncols).map(|j| j as i32 + 1).collect::<Vec<_>>();
-
-        // Fix boundary conditions
-        iN[0] = 0;
-        iS[nrows - 1] = (nrows - 1) as i32;
-        jW[0] = 0;
-        jE[ncols - 1] = (ncols - 1) as i32;
-
-        let iN_h = HerculesImmBox::from(iN.as_slice());
-        let iS_h = HerculesImmBox::from(iS.as_slice());
-        let jW_h = HerculesImmBox::from(jW.as_slice());
-        let jE_h = HerculesImmBox::from(jE.as_slice());
-
-        let mut runner = runner!(srad);
-        let result: Vec<f32> = HerculesMutBox::from(
-            runner
-                .run(
-                    nrows as u64,
-                    ncols as u64,
-                    niter as u64,
-                    image_h.to(),
-                    iN_h.to(),
-                    iS_h.to(),
-                    jW_h.to(),
-                    jE_h.to(),
-                    max,
-                    lambda,
-                )
-                .await,
-        )
-        .as_slice()
-        .to_vec();
-
-        if let Some(output) = output {
-            write_graphics(output, &result, nrows, ncols, max);
-        }
-
-        if verify {
-            let mut rust_result = image;
-            rust_srad::srad(
-                nrows,
-                ncols,
-                niter,
-                &mut rust_result,
-                &iN,
-                &iS,
-                &jW,
-                &jE,
-                max,
-                lambda,
-            );
-
-            if let Some(output) = output_verify {
-                write_graphics(output, &rust_result, nrows, ncols, max);
-            }
-
-            let max_diff = result
-                .iter()
-                .zip(rust_result.iter())
-                .map(|(a, b)| (*a as i32 - *b as i32).abs())
-                .max()
-                .unwrap_or(0);
-            assert!(
-                max_diff <= 1,
-                "Verification failed: maximum pixel difference of {} exceeds threshold of 1",
-                max_diff
-            );
-        }
-    })
-}
+use juno_srad::*;
 
 fn main() {
     let args = SRADInputs::parse();
-- 
GitLab