From 3ce50c43068f7a664d5f4d228376a65a1e995c2b Mon Sep 17 00:00:00 2001
From: Aaron Councilman <aaronjc4@illinois.edu>
Date: Tue, 4 Mar 2025 14:46:30 -0600
Subject: [PATCH] Add sequential feature to benchmarks

---
 juno_samples/cava/Cargo.toml                  |   3 +-
 juno_samples/cava/src/cpu.sch                 |  24 ++--
 juno_samples/edge_detection/Cargo.toml        |   1 +
 juno_samples/edge_detection/src/cpu.sch       | 114 +++++++++++-------
 juno_samples/rodinia/backprop/Cargo.toml      |   3 +-
 juno_samples/rodinia/backprop/src/cpu.sch     |  20 +--
 juno_samples/rodinia/bfs/Cargo.toml           |   1 +
 juno_samples/rodinia/bfs/src/cpu.sch          |  31 ++---
 juno_samples/rodinia/cfd/Cargo.toml           |   1 +
 juno_samples/rodinia/cfd/src/cpu_euler.sch    |  48 ++++----
 .../rodinia/cfd/src/cpu_pre_euler.sch         |  59 +++++----
 juno_samples/rodinia/srad/Cargo.toml          |   1 +
 juno_samples/rodinia/srad/src/cpu.sch         |  28 +++--
 13 files changed, 194 insertions(+), 140 deletions(-)

diff --git a/juno_samples/cava/Cargo.toml b/juno_samples/cava/Cargo.toml
index 17e9a1d3..bdf144da 100644
--- a/juno_samples/cava/Cargo.toml
+++ b/juno_samples/cava/Cargo.toml
@@ -13,6 +13,7 @@ path = "src/lib.rs"
 
 [features]
 cuda = ["juno_build/cuda", "hercules_rt/cuda"]
+seq = []
 
 [build-dependencies]
 juno_build = { path = "../../juno_build" }
@@ -30,4 +31,4 @@ criterion = { version = "0.5", features = ["html_reports"] }
 
 [[bench]]
 name = "cava_bench"
-harness = false
\ No newline at end of file
+harness = false
diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch
index 6fc8adbb..ada2f552 100644
--- a/juno_samples/cava/src/cpu.sch
+++ b/juno_samples/cava/src/cpu.sch
@@ -113,14 +113,18 @@ fixpoint {
 simpl!(fuse4);
 array-slf(fuse4);
 simpl!(fuse4);
-let par = fuse4@image_loop \ fuse4@channel_loop;
-fork-tile[4, 1, false, false](par);
-fork-tile[8, 0, false, false](par);
-fork-interchange[1, 2](par);
-let split = fork-split(par);
-let fuse4_body = outline(split.cava_3.fj2);
-fork-coalesce(fuse4, fuse4_body);
-simpl!(fuse4, fuse4_body);
+
+if !feature("seq") {
+  let par = fuse4@image_loop \ fuse4@channel_loop;
+  fork-tile[4, 1, false, false](par);
+  fork-tile[8, 0, false, false](par);
+  fork-interchange[1, 2](par);
+  let split = fork-split(par);
+  let fuse4_body = outline(split.cava_3.fj2);
+  fork-coalesce(fuse4, fuse4_body);
+  simpl!(fuse4, fuse4_body);
+  fuse4 = fuse4_body;
+}
 
 no-memset(fuse5@res1);
 no-memset(fuse5@res2);
@@ -136,8 +140,8 @@ simpl!(fuse5);
 delete-uncalled(*);
 simpl!(*);
 
-fork-split(fuse1, fuse2, fuse3, fuse4_body, fuse5);
-unforkify(fuse1, fuse2, fuse3, fuse4_body, fuse5);
+fork-split(fuse1, fuse2, fuse3, fuse4, fuse5);
+unforkify(fuse1, fuse2, fuse3, fuse4, fuse5);
 
 simpl!(*);
 
diff --git a/juno_samples/edge_detection/Cargo.toml b/juno_samples/edge_detection/Cargo.toml
index fa4ca1ff..8def7500 100644
--- a/juno_samples/edge_detection/Cargo.toml
+++ b/juno_samples/edge_detection/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
 [features]
 opencv = ["dep:opencv"]
 cuda = ["juno_build/cuda", "hercules_rt/cuda"]
+seq = []
 
 [[bin]]
 name = "juno_edge_detection"
diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch
index 3e1321c5..64fee6b6 100644
--- a/juno_samples/edge_detection/src/cpu.sch
+++ b/juno_samples/edge_detection/src/cpu.sch
@@ -24,14 +24,18 @@ predication(gaussian_smoothing);
 simpl!(gaussian_smoothing);
 predication(gaussian_smoothing);
 simpl!(gaussian_smoothing);
-let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop;
-fork-tile[4, 1, false, false](par);
-fork-tile[8, 0, false, false](par);
-fork-interchange[1, 2](par);
-let split = fork-split(par);
-let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2);
-fork-coalesce(gaussian_smoothing, gaussian_smoothing_body);
-simpl!(gaussian_smoothing, gaussian_smoothing_body);
+
+if !feature("seq") {
+  let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop;
+  fork-tile[4, 1, false, false](par);
+  fork-tile[8, 0, false, false](par);
+  fork-interchange[1, 2](par);
+  let split = fork-split(par);
+  let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2);
+  fork-coalesce(gaussian_smoothing, gaussian_smoothing_body);
+  simpl!(gaussian_smoothing, gaussian_smoothing_body);
+  gaussian_smoothing = gaussian_smoothing_body;
+}
 
 no-memset(laplacian_estimate@res);
 fixpoint {
@@ -40,15 +44,19 @@ fixpoint {
   fork-coalesce(laplacian_estimate);
 }
 simpl!(laplacian_estimate);
-let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop;
-fork-tile[4, 1, false, false](par);
-fork-tile[8, 0, false, false](par);
-fork-interchange[1, 2](par);
-let split = fork-split(par);
-let body = split._1_laplacian_estimate.fj2;
-let laplacian_estimate_body = outline(body);
-fork-coalesce(laplacian_estimate, laplacian_estimate_body);
-simpl!(laplacian_estimate, laplacian_estimate_body);
+
+if !feature("seq") {
+  let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop;
+  fork-tile[4, 1, false, false](par);
+  fork-tile[8, 0, false, false](par);
+  fork-interchange[1, 2](par);
+  let split = fork-split(par);
+  let body = split._1_laplacian_estimate.fj2;
+  let laplacian_estimate_body = outline(body);
+  fork-coalesce(laplacian_estimate, laplacian_estimate_body);
+  simpl!(laplacian_estimate, laplacian_estimate_body);
+  laplacian_estimate = laplacian_estimate_body;
+}
 
 no-memset(zero_crossings@res);
 fixpoint {
@@ -57,15 +65,19 @@ fixpoint {
   fork-coalesce(zero_crossings);
 }
 simpl!(zero_crossings);
-let par = zero_crossings@image_loop \ zero_crossings@filter_loop;
-fork-tile[4, 1, false, false](par);
-fork-tile[8, 0, false, false](par);
-fork-interchange[1, 2](par);
-let split = fork-split(par);
-let body = split._2_zero_crossings.fj2;
-let zero_crossings_body = outline(body);
-fork-coalesce(zero_crossings, zero_crossings_body);
-simpl!(zero_crossings, zero_crossings_body);
+
+if !feature("seq") {
+  let par = zero_crossings@image_loop \ zero_crossings@filter_loop;
+  fork-tile[4, 1, false, false](par);
+  fork-tile[8, 0, false, false](par);
+  fork-interchange[1, 2](par);
+  let split = fork-split(par);
+  let body = split._2_zero_crossings.fj2;
+  let zero_crossings_body = outline(body);
+  fork-coalesce(zero_crossings, zero_crossings_body);
+  simpl!(zero_crossings, zero_crossings_body);
+  zero_crossings = zero_crossings_body;
+}
 
 no-memset(gradient@res);
 fixpoint {
@@ -84,17 +96,23 @@ fixpoint {
   fork-coalesce(max_gradient);
 }
 simpl!(max_gradient);
-fork-dim-merge(max_gradient);
-simpl!(max_gradient);
-fork-tile[32, 0, false, false](max_gradient);
-let split = fork-split(max_gradient);
-clean-monoid-reduces(max_gradient);
-let out = outline(split._4_max_gradient.fj1);
-simpl!(max_gradient, out);
-unforkify(out);
-let out = fork-fission[split._4_max_gradient.fj0](max_gradient);
-simpl!(max_gradient);
-unforkify(out._4_max_gradient.fj_bottom);
+
+if !feature("seq") {
+  fork-dim-merge(max_gradient);
+  simpl!(max_gradient);
+  fork-tile[32, 0, false, false](max_gradient);
+  let split = fork-split(max_gradient);
+  clean-monoid-reduces(max_gradient);
+  let out = outline(split._4_max_gradient.fj1);
+  simpl!(max_gradient, out);
+  unforkify(out);
+  let out = fork-fission[split._4_max_gradient.fj0](max_gradient);
+  simpl!(max_gradient);
+  unforkify(out._4_max_gradient.fj_bottom);
+} else {
+  fork-split(max_gradient);
+  unforkify(max_gradient);
+}
 
 no-memset(reject_zero_crossings@res);
 fixpoint {
@@ -104,18 +122,22 @@ fixpoint {
 }
 predication(reject_zero_crossings);
 simpl!(reject_zero_crossings);
-fork-tile[4, 1, false, false](reject_zero_crossings);
-fork-tile[8, 0, false, false](reject_zero_crossings);
-fork-interchange[1, 2](reject_zero_crossings);
-let split = fork-split(reject_zero_crossings);
-let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2);
-fork-coalesce(reject_zero_crossings, reject_zero_crossings_body);
-simpl!(reject_zero_crossings, reject_zero_crossings_body);
+
+if !feature("seq") {
+  fork-tile[4, 1, false, false](reject_zero_crossings);
+  fork-tile[8, 0, false, false](reject_zero_crossings);
+  fork-interchange[1, 2](reject_zero_crossings);
+  let split = fork-split(reject_zero_crossings);
+  let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2);
+  fork-coalesce(reject_zero_crossings, reject_zero_crossings_body);
+  simpl!(reject_zero_crossings, reject_zero_crossings_body);
+  reject_zero_crossings = reject_zero_crossings_body;
+}
 
 async-call(edge_detection@le, edge_detection@zc);
 
-fork-split(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings_body);
-unforkify(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings_body);
+fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);
+unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);
 
 simpl!(*);
 
diff --git a/juno_samples/rodinia/backprop/Cargo.toml b/juno_samples/rodinia/backprop/Cargo.toml
index 25185e09..68c76e92 100644
--- a/juno_samples/rodinia/backprop/Cargo.toml
+++ b/juno_samples/rodinia/backprop/Cargo.toml
@@ -13,6 +13,7 @@ path = "src/lib.rs"
 
 [features]
 cuda = ["juno_build/cuda", "hercules_rt/cuda"]
+seq = []
 
 [build-dependencies]
 juno_build = { path = "../../../juno_build" }
@@ -31,4 +32,4 @@ criterion = { version = "0.5", features = ["html_reports"] }
 
 [[bench]]
 name = "backprop_bench"
-harness = false
\ No newline at end of file
+harness = false
diff --git a/juno_samples/rodinia/backprop/src/cpu.sch b/juno_samples/rodinia/backprop/src/cpu.sch
index 3c7f7d5f..4796f427 100644
--- a/juno_samples/rodinia/backprop/src/cpu.sch
+++ b/juno_samples/rodinia/backprop/src/cpu.sch
@@ -37,10 +37,12 @@ inline(backprop@forward_input, backprop@forward_hidden);
 let forward_input = outline(backprop@forward_input);
 let forward_hidden = outline(backprop@forward_hidden);
 
-fork-tile[16, 0, false, true](forward_input@outer_loop \ forward_input@inner_loop);
-let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_input@inner_loop);
-let forward_input = outline(inner);
-inline(backprop@forward_input);
+if !feature("seq") {
+  fork-tile[16, 0, false, true](forward_input@outer_loop \ forward_input@inner_loop);
+  let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_input@inner_loop);
+  forward_input = outline(inner);
+  inline(backprop@forward_input);
+}
 
 // The first call to adjust_weights has total loop dimensions of 1 * 17, so not
 // worth parallelizing (given that the body is trivial)
@@ -50,10 +52,12 @@ inline(backprop@adjust_hidden, backprop@adjust_input);
 let adjust_hidden = outline(backprop@adjust_hidden);
 let adjust_input = outline(backprop@adjust_input);
 
-fork-tile[16, 0, false, true](adjust_input);
-let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input);
-let adjust_input = outline(inner);
-inline(backprop@adjust_input);
+if !feature("seq") {
+  fork-tile[16, 0, false, true](adjust_input);
+  let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input);
+  adjust_input = outline(inner);
+  inline(backprop@adjust_input);
+}
 
 delete-uncalled(*);
 const-inline(*);
diff --git a/juno_samples/rodinia/bfs/Cargo.toml b/juno_samples/rodinia/bfs/Cargo.toml
index 34b6f5ce..46f8ade6 100644
--- a/juno_samples/rodinia/bfs/Cargo.toml
+++ b/juno_samples/rodinia/bfs/Cargo.toml
@@ -13,6 +13,7 @@ path = "src/lib.rs"
 
 [features]
 cuda = ["juno_build/cuda", "hercules_rt/cuda"]
+seq = []
 
 [build-dependencies]
 juno_build = { path = "../../../juno_build" }
diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch
index e5100561..550dce31 100644
--- a/juno_samples/rodinia/bfs/src/cpu.sch
+++ b/juno_samples/rodinia/bfs/src/cpu.sch
@@ -40,23 +40,24 @@ simpl!(collect);
 parallel-fork(traverse, collect);
 parallel-reduce(traverse, collect);
 
-fork-tile[32, 0, false, true](traverse, collect);
-let (outer, inner) = fork-reshape[[1], [0]](traverse);
-let traverse_body = outline(inner);
-let (outer, inner) = fork-reshape[[1], [0]](collect);
-let collect_body = outline(inner);
-
-let init_body = init;
-// Following code seems to generate breaking RT code
-//fork-tile[32, 0, false, true](init);
-//let (outer, inner) = fork-reshape[[1], [0]](init);
-//let init_body = outline(inner);
-//inline(bfs@cost_init);
-
-inline(bfs@loop1, bfs@loop2);
+if !feature("seq") {
+  fork-tile[32, 0, false, true](traverse, collect);
+  let (outer, inner) = fork-reshape[[1], [0]](traverse);
+  traverse = outline(inner);
+  let (outer, inner) = fork-reshape[[1], [0]](collect);
+  collect = outline(inner);
+
+  // Following code seems to generate breaking RT code
+  //fork-tile[32, 0, false, true](init);
+  //let (outer, inner) = fork-reshape[[1], [0]](init);
+  //init = outline(inner);
+  //inline(bfs@cost_init);
+
+  inline(bfs@loop1, bfs@loop2);
+}
 delete-uncalled(*);
 const-inline(*);
 
-unforkify(init_body, traverse_body, collect_body);
+unforkify(init, traverse, collect);
 simpl!(*);
 gcm(*);
diff --git a/juno_samples/rodinia/cfd/Cargo.toml b/juno_samples/rodinia/cfd/Cargo.toml
index 6720b527..172573dd 100644
--- a/juno_samples/rodinia/cfd/Cargo.toml
+++ b/juno_samples/rodinia/cfd/Cargo.toml
@@ -13,6 +13,7 @@ path = "src/lib.rs"
 
 [features]
 cuda = ["juno_build/cuda", "hercules_rt/cuda"]
+seq = []
 
 [build-dependencies]
 juno_build = { path = "../../../juno_build" }
diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch
index 7a284a9a..13125961 100644
--- a/juno_samples/rodinia/cfd/src/cpu_euler.sch
+++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch
@@ -27,29 +27,35 @@ fixpoint {
 simpl!(*);
 unforkify(compute_flux@inner_loop);
 
-fork-tile[32, 0, false, false](compute_step_factor);
-let split = fork-split(compute_step_factor);
-let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
-fork-coalesce(compute_step_factor, compute_step_factor_body);
-simpl!(compute_step_factor, compute_step_factor_body);
+if !feature("seq") {
+  fork-tile[32, 0, false, false](compute_step_factor);
+  let split = fork-split(compute_step_factor);
+  let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
+  fork-coalesce(compute_step_factor, compute_step_factor_body);
+  simpl!(compute_step_factor, compute_step_factor_body);
+  compute_step_factor = compute_step_factor_body;
 
-fork-tile[32, 0, false, false](compute_flux);
-let split = fork-split(compute_flux);
-let compute_flux_body = outline(split._6_compute_flux.fj1);
-fork-coalesce(compute_flux, compute_flux_body);
-simpl!(compute_flux, compute_flux_body);
+  fork-tile[32, 0, false, false](compute_flux);
+  let split = fork-split(compute_flux);
+  let compute_flux_body = outline(split._6_compute_flux.fj1);
+  fork-coalesce(compute_flux, compute_flux_body);
+  simpl!(compute_flux, compute_flux_body);
+  compute_flux = compute_flux_body;
 
-fork-tile[32, 0, false, false](time_step);
-let split = fork-split(time_step);
-let time_step_body = outline(split._7_time_step.fj1);
-fork-coalesce(time_step, time_step_body);
-simpl!(time_step, time_step_body);
+  fork-tile[32, 0, false, false](time_step);
+  let split = fork-split(time_step);
+  let time_step_body = outline(split._7_time_step.fj1);
+  fork-coalesce(time_step, time_step_body);
+  simpl!(time_step, time_step_body);
+  time_step = time_step_body;
 
-fork-tile[32, 0, false, false](copy_vars);
-let split = fork-split(copy_vars);
-let copy_vars_body = outline(split._8_copy_vars.fj1);
-fork-coalesce(copy_vars, copy_vars_body);
-simpl!(copy_vars, copy_vars_body);
+  fork-tile[32, 0, false, false](copy_vars);
+  let split = fork-split(copy_vars);
+  let copy_vars_body = outline(split._8_copy_vars.fj1);
+  fork-coalesce(copy_vars, copy_vars_body);
+  simpl!(copy_vars, copy_vars_body);
+  copy_vars = copy_vars_body;
+}
 
-unforkify(compute_step_factor_body, compute_flux_body, time_step_body, copy_vars_body);
+unforkify(compute_step_factor, compute_flux, time_step, copy_vars);
 gcm(*);
diff --git a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
index 518c656d..858be5ba 100644
--- a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
+++ b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
@@ -27,35 +27,42 @@ simpl!(*);
 no-memset(compute_step_factor@res, compute_flux_contributions@res, compute_flux@res, copy_vars@res);
 unforkify(compute_flux@inner_loop);
 
-fork-tile[32, 0, false, false](compute_step_factor);
-let split = fork-split(compute_step_factor);
-let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
-fork-coalesce(compute_step_factor, compute_step_factor_body);
-simpl!(compute_step_factor, compute_step_factor_body);
+if !feature("seq") {
+  fork-tile[32, 0, false, false](compute_step_factor);
+  let split = fork-split(compute_step_factor);
+  let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
+  fork-coalesce(compute_step_factor, compute_step_factor_body);
+  simpl!(compute_step_factor, compute_step_factor_body);
+  compute_step_factor = compute_step_factor_body;
 
-fork-tile[32, 0, false, false](compute_flux_contributions);
-let split = fork-split(compute_flux_contributions);
-let compute_flux_contributions_body = outline(split._6_compute_flux_contributions.fj1);
-fork-coalesce(compute_flux_contributions, compute_flux_contributions_body);
-simpl!(compute_flux_contributions, compute_flux_contributions_body);
+  fork-tile[32, 0, false, false](compute_flux_contributions);
+  let split = fork-split(compute_flux_contributions);
+  let compute_flux_contributions_body = outline(split._6_compute_flux_contributions.fj1);
+  fork-coalesce(compute_flux_contributions, compute_flux_contributions_body);
+  simpl!(compute_flux_contributions, compute_flux_contributions_body);
+  compute_flux_contributions = compute_flux_contributions_body;
 
-fork-tile[32, 0, false, false](compute_flux);
-let split = fork-split(compute_flux);
-let compute_flux_body = outline(split._7_compute_flux.fj1);
-fork-coalesce(compute_flux, compute_flux_body);
-simpl!(compute_flux, compute_flux_body);
+  fork-tile[32, 0, false, false](compute_flux);
+  let split = fork-split(compute_flux);
+  let compute_flux_body = outline(split._7_compute_flux.fj1);
+  fork-coalesce(compute_flux, compute_flux_body);
+  simpl!(compute_flux, compute_flux_body);
+  compute_flux = compute_flux_body;
 
-fork-tile[32, 0, false, false](time_step);
-let split = fork-split(time_step);
-let time_step_body = outline(split._8_time_step.fj1);
-fork-coalesce(time_step, time_step_body);
-simpl!(time_step, time_step_body);
+  fork-tile[32, 0, false, false](time_step);
+  let split = fork-split(time_step);
+  let time_step_body = outline(split._8_time_step.fj1);
+  fork-coalesce(time_step, time_step_body);
+  simpl!(time_step, time_step_body);
+  time_step = time_step_body;
 
-fork-tile[32, 0, false, false](copy_vars);
-let split = fork-split(copy_vars);
-let copy_vars_body = outline(split._9_copy_vars.fj1);
-fork-coalesce(copy_vars, copy_vars_body);
-simpl!(copy_vars, copy_vars_body);
+  fork-tile[32, 0, false, false](copy_vars);
+  let split = fork-split(copy_vars);
+  let copy_vars_body = outline(split._9_copy_vars.fj1);
+  fork-coalesce(copy_vars, copy_vars_body);
+  simpl!(copy_vars, copy_vars_body);
+  copy_vars = copy_vars_body;
+}
 
-unforkify(compute_step_factor_body, compute_flux_contributions_body, compute_flux_body, time_step_body, copy_vars_body);
+unforkify(compute_step_factor, compute_flux_contributions, compute_flux, time_step, copy_vars);
 gcm(*);
diff --git a/juno_samples/rodinia/srad/Cargo.toml b/juno_samples/rodinia/srad/Cargo.toml
index facf8c3b..783f3327 100644
--- a/juno_samples/rodinia/srad/Cargo.toml
+++ b/juno_samples/rodinia/srad/Cargo.toml
@@ -13,6 +13,7 @@ path = "src/lib.rs"
 
 [features]
 cuda = ["juno_build/cuda", "hercules_rt/cuda"]
+seq = []
 
 [build-dependencies]
 juno_build = { path = "../../../juno_build" }
diff --git a/juno_samples/rodinia/srad/src/cpu.sch b/juno_samples/rodinia/srad/src/cpu.sch
index 8917f03d..8fa22aaa 100644
--- a/juno_samples/rodinia/srad/src/cpu.sch
+++ b/juno_samples/rodinia/srad/src/cpu.sch
@@ -35,20 +35,24 @@ simpl!(*);
 slf(*);
 simpl!(*);
 
-fork-tile[32, 0, false, false](loop2);
-let split = fork-split(loop2);
-let loop2_body = outline(split.srad_1.fj1);
-simpl!(loop2, loop2_body);
+if !feature("seq") {
+  fork-tile[32, 0, false, false](loop2);
+  let split = fork-split(loop2);
+  let loop2_body = outline(split.srad_1.fj1);
+  simpl!(loop2, loop2_body);
+  loop2 = loop2_body;
 
-fork-tile[32, 0, false, false](loop3);
-let split = fork-split(loop3);
-let loop3_body = outline(split.srad_2.fj1);
-simpl!(loop3, loop3_body);
+  fork-tile[32, 0, false, false](loop3);
+  let split = fork-split(loop3);
+  let loop3_body = outline(split.srad_2.fj1);
+  simpl!(loop3, loop3_body);
+  loop3 = loop3_body;
 
-inline(srad@loop2, srad@loop3);
-delete-uncalled(*);
+  inline(srad@loop2, srad@loop3);
+  delete-uncalled(*);
+}
 
-fork-split(extract, compress, loop1, loop2_body, loop3_body);
-unforkify(extract, compress, loop1, loop2_body, loop3_body);
+fork-split(extract, compress, loop1, loop2, loop3);
+unforkify(extract, compress, loop1, loop2, loop3);
 
 gcm(*);
-- 
GitLab