diff --git a/Cargo.lock b/Cargo.lock
index c872be3ad7993859e29e1482866ce49865e3cc29..5916a17c66078cfb097ebda745b0b8b48dda1d31 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1238,6 +1238,7 @@ version = "0.1.0"
 dependencies = [
  "async-std",
  "clap",
+ "criterion",
  "hercules_rt",
  "juno_build",
  "nom 8.0.0",
diff --git a/juno_samples/rodinia/backprop/Cargo.toml b/juno_samples/rodinia/backprop/Cargo.toml
index 729b3969c5d9acbbdc7d50ca056bc24ab20dd9f7..25185e0944d171c748f4d5f8e10e9646e5cce9eb 100644
--- a/juno_samples/rodinia/backprop/Cargo.toml
+++ b/juno_samples/rodinia/backprop/Cargo.toml
@@ -8,6 +8,9 @@ edition = "2021"
 name = "juno_backprop"
 path = "src/main.rs"
 
+[lib]
+path = "src/lib.rs"
+
 [features]
 cuda = ["juno_build/cuda", "hercules_rt/cuda"]
 
@@ -22,3 +25,10 @@ clap = { version = "*", features = ["derive"] }
 with_builtin_macros = "0.1.0"
 nom = "*"
 rand = "0.9.0"
+
+[dev-dependencies]
+criterion = { version = "0.5", features = ["html_reports"] }
+
+[[bench]]
+name = "backprop_bench"
+harness = false
\ No newline at end of file
diff --git a/juno_samples/rodinia/backprop/benches/backprop_bench.rs b/juno_samples/rodinia/backprop/benches/backprop_bench.rs
new file mode 100644
index 0000000000000000000000000000000000000000..40cad98f6388e41519757c8b265c19c208809759
--- /dev/null
+++ b/juno_samples/rodinia/backprop/benches/backprop_bench.rs
@@ -0,0 +1,65 @@
+#![feature(concat_idents)]
+use criterion::{criterion_group, criterion_main, Criterion};
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
+
+juno_build::juno!("backprop");
+
+fn backprop_bench(c: &mut Criterion) {
+    let mut group = c.benchmark_group("backprop bench");
+    group.sample_size(10);
+
+    let mut rng = StdRng::seed_from_u64(7);
+
+    let input_n = 65536;
+    let hidden_n = 16;
+    let output_n = 1;
+
+    let mut input_vals = vec![0.0f32; input_n + 1];
+    input_vals[0] = 1.0;
+
+    // For some reason the bpnn_randomize_row function used on target just sets it to 0.1
+    let target = vec![0.1f32; output_n + 1];
+
+    let input_weights = (0..(input_n + 1) * (hidden_n + 1))
+        .map(|_| rng.random::<f32>())
+        .collect::<Vec<_>>();
+    let hidden_weights = (0..(hidden_n + 1) * (output_n + 1))
+        .map(|_| rng.random::<f32>())
+        .collect::<Vec<_>>();
+
+    let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)];
+    let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)];
+
+    let mut r = runner!(backprop);
+    let input_vals = HerculesImmBox::from(&input_vals as &[f32]);
+    let target = HerculesImmBox::from(&target as &[f32]);
+    let mut input_weights = HerculesMutBox::from(input_weights.to_vec());
+    let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec());
+    let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec());
+    let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec());
+
+    group.bench_function("backprop bench", |b| {
+        b.iter(|| {
+            async_std::task::block_on(async {
+                r.run(
+                    input_n as u64,
+                    hidden_n as u64,
+                    output_n as u64,
+                    input_vals.to(),
+                    input_weights.to(),
+                    hidden_weights.to(),
+                    target.to(),
+                    input_prev_weights.to(),
+                    hidden_prev_weights.to(),
+                )
+                .await
+            });
+        })
+    });
+}
+
+criterion_group!(benches, backprop_bench);
+criterion_main!(benches);
diff --git a/juno_samples/rodinia/backprop/src/lib.rs b/juno_samples/rodinia/backprop/src/lib.rs
new file mode 100644
index 0000000000000000000000000000000000000000..e2fc2ad5b9cd864c7a7c956bc1091eab350687b4
--- /dev/null
+++ b/juno_samples/rodinia/backprop/src/lib.rs
@@ -0,0 +1,156 @@
+#![feature(concat_idents)]
+
+juno_build::juno!("backprop");
+
+mod rust_backprop;
+
+use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
+
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+use clap::Parser;
+
+#[derive(Parser)]
+#[clap(author, version, about, long_about = None)]
+pub struct BackpropInputs {
+    pub layer_size: usize,
+}
+
+fn run_backprop(
+    input_n: u64,
+    hidden_n: u64,
+    output_n: u64,
+    input_vals: &[f32],
+    input_weights: &[f32],
+    hidden_weights: &[f32],
+    target: &[f32],
+    input_prev_weights: &[f32],
+    hidden_prev_weights: &[f32],
+) -> (f32, f32, Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) {
+    let input_vals = HerculesImmBox::from(input_vals);
+    let target = HerculesImmBox::from(target);
+
+    let mut input_weights = HerculesMutBox::from(input_weights.to_vec());
+    let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec());
+    let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec());
+    let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec());
+
+    let mut runner = runner!(backprop);
+    let (out_err, hid_err, input_weights, input_prev_weights, hidden_weights, hidden_prev_weights) =
+        async_std::task::block_on(async {
+            runner
+                .run(
+                    input_n,
+                    hidden_n,
+                    output_n,
+                    input_vals.to(),
+                    input_weights.to(),
+                    hidden_weights.to(),
+                    target.to(),
+                    input_prev_weights.to(),
+                    hidden_prev_weights.to(),
+                )
+                .await
+        });
+    let mut input_weights = HerculesMutBox::from(input_weights);
+    let mut hidden_weights = HerculesMutBox::from(hidden_weights);
+    let mut input_prev_weights = HerculesMutBox::from(input_prev_weights);
+    let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights);
+
+    (
+        out_err,
+        hid_err,
+        input_weights.as_slice().to_vec(),
+        hidden_weights.as_slice().to_vec(),
+        input_prev_weights.as_slice().to_vec(),
+        hidden_prev_weights.as_slice().to_vec(),
+    )
+}
+
+fn compare_float(x: f32, y: f32) -> bool {
+    (x - y).abs() < 1e-5
+}
+
+fn compare_floats(xs: &[f32], ys: &[f32]) -> bool {
+    xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y))
+}
+
+pub fn backprop_harness(args: BackpropInputs) {
+    let BackpropInputs { layer_size } = args;
+
+    let mut rng = StdRng::seed_from_u64(7);
+
+    let input_n = layer_size;
+    let hidden_n = 16;
+    let output_n = 1;
+
+    let mut input_vals = vec![0.0; input_n + 1];
+    input_vals[0] = 1.0;
+
+    // For some reason the bpnn_randomize_row function used on target just sets it to 0.1
+    let target = vec![0.1; output_n + 1];
+
+    let input_weights = (0..(input_n + 1) * (hidden_n + 1))
+        .map(|_| rng.random::<f32>())
+        .collect::<Vec<_>>();
+    let hidden_weights = (0..(hidden_n + 1) * (output_n + 1))
+        .map(|_| rng.random::<f32>())
+        .collect::<Vec<_>>();
+
+    let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)];
+    let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)];
+
+    let (
+        juno_out_err,
+        juno_hid_err,
+        juno_input_weights,
+        juno_hidden_weights,
+        juno_input_prev_weights,
+        juno_hidden_prev_weights,
+    ) = run_backprop(
+        input_n as u64,
+        hidden_n as u64,
+        output_n as u64,
+        &input_vals,
+        &input_weights,
+        &hidden_weights,
+        &target,
+        &input_prev_weights,
+        &hidden_prev_weights,
+    );
+
+    let (
+        rust_out_err,
+        rust_hid_err,
+        rust_input_weights,
+        rust_hidden_weights,
+        rust_input_prev_weights,
+        rust_hidden_prev_weights,
+    ) = rust_backprop::backprop(
+        input_n,
+        hidden_n,
+        output_n,
+        &input_vals,
+        input_weights,
+        hidden_weights,
+        &target,
+        input_prev_weights,
+        hidden_prev_weights,
+    );
+
+    assert!(compare_float(juno_out_err, rust_out_err));
+    assert!(compare_float(juno_hid_err, rust_hid_err));
+    if !compare_floats(&juno_input_weights, &rust_input_weights) {
+        panic!("Input weights do not match after training");
+    }
+    if !compare_floats(&juno_hidden_weights, &rust_hidden_weights) {
+        panic!("Hidden weights do not match after training");
+    }
+    if !compare_floats(&juno_input_prev_weights, &rust_input_prev_weights) {
+        panic!("Input prev_weights do not match after training");
+    }
+    if !compare_floats(&juno_hidden_prev_weights, &rust_hidden_prev_weights) {
+        panic!("Hidden prev_weights do not match after training");
+    }
+}
diff --git a/juno_samples/rodinia/backprop/src/main.rs b/juno_samples/rodinia/backprop/src/main.rs
index fa80a7a51cba6581f3305398f5e3f91da05ad877..bb0d13a10946d0eee282fb3d5c4ff9b556b127fd 100644
--- a/juno_samples/rodinia/backprop/src/main.rs
+++ b/juno_samples/rodinia/backprop/src/main.rs
@@ -1,159 +1,6 @@
-#![feature(concat_idents)]
-
-juno_build::juno!("backprop");
-
-mod rust_backprop;
-
-use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
-
-use rand::rngs::StdRng;
-use rand::{Rng, SeedableRng};
-
 use clap::Parser;
 
-#[derive(Parser)]
-#[clap(author, version, about, long_about = None)]
-struct BackpropInputs {
-    layer_size: usize,
-}
-
-fn run_backprop(
-    input_n: u64,
-    hidden_n: u64,
-    output_n: u64,
-    input_vals: &[f32],
-    input_weights: &[f32],
-    hidden_weights: &[f32],
-    target: &[f32],
-    input_prev_weights: &[f32],
-    hidden_prev_weights: &[f32],
-) -> (f32, f32, Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) {
-    let input_vals = HerculesImmBox::from(input_vals);
-    let target = HerculesImmBox::from(target);
-
-    let mut input_weights = HerculesMutBox::from(input_weights.to_vec());
-    let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec());
-    let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec());
-    let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec());
-
-    let mut runner = runner!(backprop);
-    let (out_err, hid_err, input_weights, input_prev_weights, hidden_weights, hidden_prev_weights) =
-        async_std::task::block_on(async {
-            runner
-                .run(
-                    input_n,
-                    hidden_n,
-                    output_n,
-                    input_vals.to(),
-                    input_weights.to(),
-                    hidden_weights.to(),
-                    target.to(),
-                    input_prev_weights.to(),
-                    hidden_prev_weights.to(),
-                )
-                .await
-        });
-    let mut input_weights = HerculesMutBox::from(input_weights);
-    let mut hidden_weights = HerculesMutBox::from(hidden_weights);
-    let mut input_prev_weights = HerculesMutBox::from(input_prev_weights);
-    let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights);
-
-    (
-        out_err,
-        hid_err,
-        input_weights.as_slice().to_vec(),
-        hidden_weights.as_slice().to_vec(),
-        input_prev_weights.as_slice().to_vec(),
-        hidden_prev_weights.as_slice().to_vec(),
-    )
-}
-
-fn compare_float(x: f32, y: f32) -> bool {
-    (x - y).abs() < 1e-5
-}
-
-fn compare_floats(xs: &[f32], ys: &[f32]) -> bool {
-    xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y))
-}
-
-fn backprop_harness(args: BackpropInputs) {
-    let BackpropInputs { layer_size } = args;
-
-    let mut rng = StdRng::seed_from_u64(7);
-
-    let input_n = layer_size;
-    let hidden_n = 16;
-    let output_n = 1;
-
-    let mut input_vals = vec![0.0; input_n + 1];
-    input_vals[0] = 1.0;
-
-    // For some reason the bpnn_randomize_row function used on target just sets it to 0.1
-    let target = vec![0.1; output_n + 1];
-
-    let input_weights = (0..(input_n + 1) * (hidden_n + 1))
-        .map(|_| rng.random::<f32>())
-        .collect::<Vec<_>>();
-    let hidden_weights = (0..(hidden_n + 1) * (output_n + 1))
-        .map(|_| rng.random::<f32>())
-        .collect::<Vec<_>>();
-
-    let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)];
-    let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)];
-
-    let (
-        juno_out_err,
-        juno_hid_err,
-        juno_input_weights,
-        juno_hidden_weights,
-        juno_input_prev_weights,
-        juno_hidden_prev_weights,
-    ) = run_backprop(
-        input_n as u64,
-        hidden_n as u64,
-        output_n as u64,
-        &input_vals,
-        &input_weights,
-        &hidden_weights,
-        &target,
-        &input_prev_weights,
-        &hidden_prev_weights,
-    );
-
-    let (
-        rust_out_err,
-        rust_hid_err,
-        rust_input_weights,
-        rust_hidden_weights,
-        rust_input_prev_weights,
-        rust_hidden_prev_weights,
-    ) = rust_backprop::backprop(
-        input_n,
-        hidden_n,
-        output_n,
-        &input_vals,
-        input_weights,
-        hidden_weights,
-        &target,
-        input_prev_weights,
-        hidden_prev_weights,
-    );
-
-    assert!(compare_float(juno_out_err, rust_out_err));
-    assert!(compare_float(juno_hid_err, rust_hid_err));
-    if !compare_floats(&juno_input_weights, &rust_input_weights) {
-        panic!("Input weights do not match after training");
-    }
-    if !compare_floats(&juno_hidden_weights, &rust_hidden_weights) {
-        panic!("Hidden weights do not match after training");
-    }
-    if !compare_floats(&juno_input_prev_weights, &rust_input_prev_weights) {
-        panic!("Input prev_weights do not match after training");
-    }
-    if !compare_floats(&juno_hidden_prev_weights, &rust_hidden_prev_weights) {
-        panic!("Hidden prev_weights do not match after training");
-    }
-}
+use juno_backprop::{backprop_harness, BackpropInputs};
 
 fn main() {
     let args = BackpropInputs::parse();