diff --git a/juno_samples/cava/Cargo.toml b/juno_samples/cava/Cargo.toml index 17e9a1d330ae08c0ae36dd2474ac29aa9ce76d7a..bdf144da56d0a99d46aad8beffdf4615b01790c9 100644 --- a/juno_samples/cava/Cargo.toml +++ b/juno_samples/cava/Cargo.toml @@ -13,6 +13,7 @@ path = "src/lib.rs" [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] +seq = [] [build-dependencies] juno_build = { path = "../../juno_build" } @@ -30,4 +31,4 @@ criterion = { version = "0.5", features = ["html_reports"] } [[bench]] name = "cava_bench" -harness = false \ No newline at end of file +harness = false diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index 6fc8adbb1fff30856978ce201af09fd2e02d6298..ada2f552fb1f31412d3f9a0bfe5c27884d4d86e6 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -113,14 +113,18 @@ fixpoint { simpl!(fuse4); array-slf(fuse4); simpl!(fuse4); -let par = fuse4@image_loop \ fuse4@channel_loop; -fork-tile[4, 1, false, false](par); -fork-tile[8, 0, false, false](par); -fork-interchange[1, 2](par); -let split = fork-split(par); -let fuse4_body = outline(split.cava_3.fj2); -fork-coalesce(fuse4, fuse4_body); -simpl!(fuse4, fuse4_body); + +if !feature("seq") { + let par = fuse4@image_loop \ fuse4@channel_loop; + fork-tile[4, 1, false, false](par); + fork-tile[8, 0, false, false](par); + fork-interchange[1, 2](par); + let split = fork-split(par); + let fuse4_body = outline(split.cava_3.fj2); + fork-coalesce(fuse4, fuse4_body); + simpl!(fuse4, fuse4_body); + fuse4 = fuse4_body; +} no-memset(fuse5@res1); no-memset(fuse5@res2); @@ -136,8 +140,8 @@ simpl!(fuse5); delete-uncalled(*); simpl!(*); -fork-split(fuse1, fuse2, fuse3, fuse4_body, fuse5); -unforkify(fuse1, fuse2, fuse3, fuse4_body, fuse5); +fork-split(fuse1, fuse2, fuse3, fuse4, fuse5); +unforkify(fuse1, fuse2, fuse3, fuse4, fuse5); simpl!(*); diff --git a/juno_samples/edge_detection/Cargo.toml b/juno_samples/edge_detection/Cargo.toml index fa4ca1ff09ea0ebc404998ad6119e886d1d95b4c..8def75003c3728a7da6d257131b628daa5849597 100644 --- a/juno_samples/edge_detection/Cargo.toml +++ b/juno_samples/edge_detection/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [features] opencv = ["dep:opencv"] cuda = ["juno_build/cuda", "hercules_rt/cuda"] +seq = [] [[bin]] name = "juno_edge_detection" diff --git a/juno_samples/edge_detection/src/cpu.sch b/juno_samples/edge_detection/src/cpu.sch index 3e1321c517b55f72e529e8a6df46d9d55bf59efb..64fee6b648c6fc0e17b39677353e442b4b37596e 100644 --- a/juno_samples/edge_detection/src/cpu.sch +++ b/juno_samples/edge_detection/src/cpu.sch @@ -24,14 +24,18 @@ predication(gaussian_smoothing); simpl!(gaussian_smoothing); predication(gaussian_smoothing); simpl!(gaussian_smoothing); -let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop; -fork-tile[4, 1, false, false](par); -fork-tile[8, 0, false, false](par); -fork-interchange[1, 2](par); -let split = fork-split(par); -let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2); -fork-coalesce(gaussian_smoothing, gaussian_smoothing_body); -simpl!(gaussian_smoothing, gaussian_smoothing_body); + +if !feature("seq") { + let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop; + fork-tile[4, 1, false, false](par); + fork-tile[8, 0, false, false](par); + fork-interchange[1, 2](par); + let split = fork-split(par); + let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2); + fork-coalesce(gaussian_smoothing, gaussian_smoothing_body); + simpl!(gaussian_smoothing, gaussian_smoothing_body); + gaussian_smoothing = gaussian_smoothing_body; +} no-memset(laplacian_estimate@res); fixpoint { @@ -40,15 +44,19 @@ fixpoint { fork-coalesce(laplacian_estimate); } simpl!(laplacian_estimate); -let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop; -fork-tile[4, 1, false, false](par); -fork-tile[8, 0, false, false](par); -fork-interchange[1, 2](par); -let split = fork-split(par); -let body = split._1_laplacian_estimate.fj2; -let laplacian_estimate_body = outline(body); -fork-coalesce(laplacian_estimate, laplacian_estimate_body); -simpl!(laplacian_estimate, laplacian_estimate_body); + +if !feature("seq") { + let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop; + fork-tile[4, 1, false, false](par); + fork-tile[8, 0, false, false](par); + fork-interchange[1, 2](par); + let split = fork-split(par); + let body = split._1_laplacian_estimate.fj2; + let laplacian_estimate_body = outline(body); + fork-coalesce(laplacian_estimate, laplacian_estimate_body); + simpl!(laplacian_estimate, laplacian_estimate_body); + laplacian_estimate = laplacian_estimate_body; +} no-memset(zero_crossings@res); fixpoint { @@ -57,15 +65,19 @@ fixpoint { fork-coalesce(zero_crossings); } simpl!(zero_crossings); -let par = zero_crossings@image_loop \ zero_crossings@filter_loop; -fork-tile[4, 1, false, false](par); -fork-tile[8, 0, false, false](par); -fork-interchange[1, 2](par); -let split = fork-split(par); -let body = split._2_zero_crossings.fj2; -let zero_crossings_body = outline(body); -fork-coalesce(zero_crossings, zero_crossings_body); -simpl!(zero_crossings, zero_crossings_body); + +if !feature("seq") { + let par = zero_crossings@image_loop \ zero_crossings@filter_loop; + fork-tile[4, 1, false, false](par); + fork-tile[8, 0, false, false](par); + fork-interchange[1, 2](par); + let split = fork-split(par); + let body = split._2_zero_crossings.fj2; + let zero_crossings_body = outline(body); + fork-coalesce(zero_crossings, zero_crossings_body); + simpl!(zero_crossings, zero_crossings_body); + zero_crossings = zero_crossings_body; +} no-memset(gradient@res); fixpoint { @@ -84,17 +96,23 @@ fixpoint { fork-coalesce(max_gradient); } simpl!(max_gradient); -fork-dim-merge(max_gradient); -simpl!(max_gradient); -fork-tile[32, 0, false, false](max_gradient); -let split = fork-split(max_gradient); -clean-monoid-reduces(max_gradient); -let out = outline(split._4_max_gradient.fj1); -simpl!(max_gradient, out); -unforkify(out); -let out = fork-fission[split._4_max_gradient.fj0](max_gradient); -simpl!(max_gradient); -unforkify(out._4_max_gradient.fj_bottom); + +if !feature("seq") { + fork-dim-merge(max_gradient); + simpl!(max_gradient); + fork-tile[32, 0, false, false](max_gradient); + let split = fork-split(max_gradient); + clean-monoid-reduces(max_gradient); + let out = outline(split._4_max_gradient.fj1); + simpl!(max_gradient, out); + unforkify(out); + let out = fork-fission[split._4_max_gradient.fj0](max_gradient); + simpl!(max_gradient); + unforkify(out._4_max_gradient.fj_bottom); +} else { + fork-split(max_gradient); + unforkify(max_gradient); +} no-memset(reject_zero_crossings@res); fixpoint { @@ -104,18 +122,22 @@ fixpoint { } predication(reject_zero_crossings); simpl!(reject_zero_crossings); -fork-tile[4, 1, false, false](reject_zero_crossings); -fork-tile[8, 0, false, false](reject_zero_crossings); -fork-interchange[1, 2](reject_zero_crossings); -let split = fork-split(reject_zero_crossings); -let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2); -fork-coalesce(reject_zero_crossings, reject_zero_crossings_body); -simpl!(reject_zero_crossings, reject_zero_crossings_body); + +if !feature("seq") { + fork-tile[4, 1, false, false](reject_zero_crossings); + fork-tile[8, 0, false, false](reject_zero_crossings); + fork-interchange[1, 2](reject_zero_crossings); + let split = fork-split(reject_zero_crossings); + let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2); + fork-coalesce(reject_zero_crossings, reject_zero_crossings_body); + simpl!(reject_zero_crossings, reject_zero_crossings_body); + reject_zero_crossings = reject_zero_crossings_body; +} async-call(edge_detection@le, edge_detection@zc); -fork-split(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings_body); -unforkify(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings_body); +fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings); +unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings); simpl!(*); diff --git a/juno_samples/rodinia/backprop/Cargo.toml b/juno_samples/rodinia/backprop/Cargo.toml index 25185e0944d171c748f4d5f8e10e9646e5cce9eb..68c76e92f8ba8ca6bd95aaa953b7f9c7a87b83a5 100644 --- a/juno_samples/rodinia/backprop/Cargo.toml +++ b/juno_samples/rodinia/backprop/Cargo.toml @@ -13,6 +13,7 @@ path = "src/lib.rs" [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] +seq = [] [build-dependencies] juno_build = { path = "../../../juno_build" } @@ -31,4 +32,4 @@ criterion = { version = "0.5", features = ["html_reports"] } [[bench]] name = "backprop_bench" -harness = false \ No newline at end of file +harness = false diff --git a/juno_samples/rodinia/backprop/src/cpu.sch b/juno_samples/rodinia/backprop/src/cpu.sch index 3c7f7d5f2da8012968d5ccf31b2abff46318f2c0..4796f427aae2746f853e65e3f3d7589f63c2e7f2 100644 --- a/juno_samples/rodinia/backprop/src/cpu.sch +++ b/juno_samples/rodinia/backprop/src/cpu.sch @@ -37,10 +37,12 @@ inline(backprop@forward_input, backprop@forward_hidden); let forward_input = outline(backprop@forward_input); let forward_hidden = outline(backprop@forward_hidden); -fork-tile[16, 0, false, true](forward_input@outer_loop \ forward_input@inner_loop); -let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_input@inner_loop); -let forward_input = outline(inner); -inline(backprop@forward_input); +if !feature("seq") { + fork-tile[16, 0, false, true](forward_input@outer_loop \ forward_input@inner_loop); + let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_input@inner_loop); + forward_input = outline(inner); + inline(backprop@forward_input); +} // The first call to adjust_weights has total loop dimensions of 1 * 17, so not // worth parallelizing (given that the body is trivial) @@ -50,10 +52,12 @@ inline(backprop@adjust_hidden, backprop@adjust_input); let adjust_hidden = outline(backprop@adjust_hidden); let adjust_input = outline(backprop@adjust_input); -fork-tile[16, 0, false, true](adjust_input); -let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input); -let adjust_input = outline(inner); -inline(backprop@adjust_input); +if !feature("seq") { + fork-tile[16, 0, false, true](adjust_input); + let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input); + adjust_input = outline(inner); + inline(backprop@adjust_input); +} delete-uncalled(*); const-inline(*); diff --git a/juno_samples/rodinia/bfs/Cargo.toml b/juno_samples/rodinia/bfs/Cargo.toml index 34b6f5cefccc5a7f175ef0389c0ec0310e17079a..46f8ade6bf969a2892341a91629b404ef417fa56 100644 --- a/juno_samples/rodinia/bfs/Cargo.toml +++ b/juno_samples/rodinia/bfs/Cargo.toml @@ -13,6 +13,7 @@ path = "src/lib.rs" [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] +seq = [] [build-dependencies] juno_build = { path = "../../../juno_build" } diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch index 2bd762b2ce8d3821a2971d8c7ad849fd722e5c5c..ea6f0403c8f0824c0bcf27dc6dcd15649bcdb2ec 100644 --- a/juno_samples/rodinia/bfs/src/cpu.sch +++ b/juno_samples/rodinia/bfs/src/cpu.sch @@ -40,21 +40,24 @@ simpl!(collect); parallel-fork(traverse, collect); parallel-reduce(traverse, collect); -fork-tile[32, 0, false, true](traverse, collect); -let (outer, inner) = fork-reshape[[1], [0]](traverse); -let traverse_body = outline(inner); -let (outer, inner) = fork-reshape[[1], [0]](collect); -let collect_body = outline(inner); - -fork-tile[32, 0, false, true](init); -let (outer, inner) = fork-reshape[[1], [0]](init); -let init_body = outline(inner); - -inline(bfs@cost_init, bfs@loop1, bfs@loop2); +if !feature("seq") { + fork-tile[32, 0, false, true](traverse, collect); + let (outer, inner) = fork-reshape[[1], [0]](traverse); + traverse = outline(inner); + let (outer, inner) = fork-reshape[[1], [0]](collect); + collect = outline(inner); + + fork-tile[32, 0, false, true](init); + let (outer, inner) = fork-reshape[[1], [0]](init); + let init_body = outline(inner); + + inline(bfs@cost_init, bfs@loop1, bfs@loop2); + init = init_body; +} delete-uncalled(*); const-inline(*); simpl!(*); -unforkify(init_body, traverse_body, collect_body); +unforkify(init, traverse, collect); simpl!(*); -gcm(*); +gcm(*); \ No newline at end of file diff --git a/juno_samples/rodinia/cfd/Cargo.toml b/juno_samples/rodinia/cfd/Cargo.toml index 6720b5275381594a63f31571ccf6266ebe4e46f4..172573ddcf3edfc79aeeca3e2140ade2cbfddb33 100644 --- a/juno_samples/rodinia/cfd/Cargo.toml +++ b/juno_samples/rodinia/cfd/Cargo.toml @@ -13,6 +13,7 @@ path = "src/lib.rs" [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] +seq = [] [build-dependencies] juno_build = { path = "../../../juno_build" } diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch index 7a284a9a3bed0c1800366c64ceeca20265d845a6..13125961275b8982b7fc7c589198bc83bd36c420 100644 --- a/juno_samples/rodinia/cfd/src/cpu_euler.sch +++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch @@ -27,29 +27,35 @@ fixpoint { simpl!(*); unforkify(compute_flux@inner_loop); -fork-tile[32, 0, false, false](compute_step_factor); -let split = fork-split(compute_step_factor); -let compute_step_factor_body = outline(split._4_compute_step_factor.fj1); -fork-coalesce(compute_step_factor, compute_step_factor_body); -simpl!(compute_step_factor, compute_step_factor_body); +if !feature("seq") { + fork-tile[32, 0, false, false](compute_step_factor); + let split = fork-split(compute_step_factor); + let compute_step_factor_body = outline(split._4_compute_step_factor.fj1); + fork-coalesce(compute_step_factor, compute_step_factor_body); + simpl!(compute_step_factor, compute_step_factor_body); + compute_step_factor = compute_step_factor_body; -fork-tile[32, 0, false, false](compute_flux); -let split = fork-split(compute_flux); -let compute_flux_body = outline(split._6_compute_flux.fj1); -fork-coalesce(compute_flux, compute_flux_body); -simpl!(compute_flux, compute_flux_body); + fork-tile[32, 0, false, false](compute_flux); + let split = fork-split(compute_flux); + let compute_flux_body = outline(split._6_compute_flux.fj1); + fork-coalesce(compute_flux, compute_flux_body); + simpl!(compute_flux, compute_flux_body); + compute_flux = compute_flux_body; -fork-tile[32, 0, false, false](time_step); -let split = fork-split(time_step); -let time_step_body = outline(split._7_time_step.fj1); -fork-coalesce(time_step, time_step_body); -simpl!(time_step, time_step_body); + fork-tile[32, 0, false, false](time_step); + let split = fork-split(time_step); + let time_step_body = outline(split._7_time_step.fj1); + fork-coalesce(time_step, time_step_body); + simpl!(time_step, time_step_body); + time_step = time_step_body; -fork-tile[32, 0, false, false](copy_vars); -let split = fork-split(copy_vars); -let copy_vars_body = outline(split._8_copy_vars.fj1); -fork-coalesce(copy_vars, copy_vars_body); -simpl!(copy_vars, copy_vars_body); + fork-tile[32, 0, false, false](copy_vars); + let split = fork-split(copy_vars); + let copy_vars_body = outline(split._8_copy_vars.fj1); + fork-coalesce(copy_vars, copy_vars_body); + simpl!(copy_vars, copy_vars_body); + copy_vars = copy_vars_body; +} -unforkify(compute_step_factor_body, compute_flux_body, time_step_body, copy_vars_body); +unforkify(compute_step_factor, compute_flux, time_step, copy_vars); gcm(*); diff --git a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch index 518c656d99652dd9813e6aa5c8e8e84388836d47..858be5baec53ea93e5658cfed6b9290cc4010c1f 100644 --- a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch +++ b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch @@ -27,35 +27,42 @@ simpl!(*); no-memset(compute_step_factor@res, compute_flux_contributions@res, compute_flux@res, copy_vars@res); unforkify(compute_flux@inner_loop); -fork-tile[32, 0, false, false](compute_step_factor); -let split = fork-split(compute_step_factor); -let compute_step_factor_body = outline(split._4_compute_step_factor.fj1); -fork-coalesce(compute_step_factor, compute_step_factor_body); -simpl!(compute_step_factor, compute_step_factor_body); +if !feature("seq") { + fork-tile[32, 0, false, false](compute_step_factor); + let split = fork-split(compute_step_factor); + let compute_step_factor_body = outline(split._4_compute_step_factor.fj1); + fork-coalesce(compute_step_factor, compute_step_factor_body); + simpl!(compute_step_factor, compute_step_factor_body); + compute_step_factor = compute_step_factor_body; -fork-tile[32, 0, false, false](compute_flux_contributions); -let split = fork-split(compute_flux_contributions); -let compute_flux_contributions_body = outline(split._6_compute_flux_contributions.fj1); -fork-coalesce(compute_flux_contributions, compute_flux_contributions_body); -simpl!(compute_flux_contributions, compute_flux_contributions_body); + fork-tile[32, 0, false, false](compute_flux_contributions); + let split = fork-split(compute_flux_contributions); + let compute_flux_contributions_body = outline(split._6_compute_flux_contributions.fj1); + fork-coalesce(compute_flux_contributions, compute_flux_contributions_body); + simpl!(compute_flux_contributions, compute_flux_contributions_body); + compute_flux_contributions = compute_flux_contributions_body; -fork-tile[32, 0, false, false](compute_flux); -let split = fork-split(compute_flux); -let compute_flux_body = outline(split._7_compute_flux.fj1); -fork-coalesce(compute_flux, compute_flux_body); -simpl!(compute_flux, compute_flux_body); + fork-tile[32, 0, false, false](compute_flux); + let split = fork-split(compute_flux); + let compute_flux_body = outline(split._7_compute_flux.fj1); + fork-coalesce(compute_flux, compute_flux_body); + simpl!(compute_flux, compute_flux_body); + compute_flux = compute_flux_body; -fork-tile[32, 0, false, false](time_step); -let split = fork-split(time_step); -let time_step_body = outline(split._8_time_step.fj1); -fork-coalesce(time_step, time_step_body); -simpl!(time_step, time_step_body); + fork-tile[32, 0, false, false](time_step); + let split = fork-split(time_step); + let time_step_body = outline(split._8_time_step.fj1); + fork-coalesce(time_step, time_step_body); + simpl!(time_step, time_step_body); + time_step = time_step_body; -fork-tile[32, 0, false, false](copy_vars); -let split = fork-split(copy_vars); -let copy_vars_body = outline(split._9_copy_vars.fj1); -fork-coalesce(copy_vars, copy_vars_body); -simpl!(copy_vars, copy_vars_body); + fork-tile[32, 0, false, false](copy_vars); + let split = fork-split(copy_vars); + let copy_vars_body = outline(split._9_copy_vars.fj1); + fork-coalesce(copy_vars, copy_vars_body); + simpl!(copy_vars, copy_vars_body); + copy_vars = copy_vars_body; +} -unforkify(compute_step_factor_body, compute_flux_contributions_body, compute_flux_body, time_step_body, copy_vars_body); +unforkify(compute_step_factor, compute_flux_contributions, compute_flux, time_step, copy_vars); gcm(*); diff --git a/juno_samples/rodinia/srad/Cargo.toml b/juno_samples/rodinia/srad/Cargo.toml index facf8c3bc7c92fe0b77dd85900c3e53307d358e5..783f3327ccf38095d5dfdf9b6cd188d0d0c55e71 100644 --- a/juno_samples/rodinia/srad/Cargo.toml +++ b/juno_samples/rodinia/srad/Cargo.toml @@ -13,6 +13,7 @@ path = "src/lib.rs" [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] +seq = [] [build-dependencies] juno_build = { path = "../../../juno_build" } diff --git a/juno_samples/rodinia/srad/src/cpu.sch b/juno_samples/rodinia/srad/src/cpu.sch index 8917f03dd0813519e7362ff3d3401ff436af0e77..8fa22aaa9f1213777439d153cef25cbf5b88ab8b 100644 --- a/juno_samples/rodinia/srad/src/cpu.sch +++ b/juno_samples/rodinia/srad/src/cpu.sch @@ -35,20 +35,24 @@ simpl!(*); slf(*); simpl!(*); -fork-tile[32, 0, false, false](loop2); -let split = fork-split(loop2); -let loop2_body = outline(split.srad_1.fj1); -simpl!(loop2, loop2_body); +if !feature("seq") { + fork-tile[32, 0, false, false](loop2); + let split = fork-split(loop2); + let loop2_body = outline(split.srad_1.fj1); + simpl!(loop2, loop2_body); + loop2 = loop2_body; -fork-tile[32, 0, false, false](loop3); -let split = fork-split(loop3); -let loop3_body = outline(split.srad_2.fj1); -simpl!(loop3, loop3_body); + fork-tile[32, 0, false, false](loop3); + let split = fork-split(loop3); + let loop3_body = outline(split.srad_2.fj1); + simpl!(loop3, loop3_body); + loop3 = loop3_body; -inline(srad@loop2, srad@loop3); -delete-uncalled(*); + inline(srad@loop2, srad@loop3); + delete-uncalled(*); +} -fork-split(extract, compress, loop1, loop2_body, loop3_body); -unforkify(extract, compress, loop1, loop2_body, loop3_body); +fork-split(extract, compress, loop1, loop2, loop3); +unforkify(extract, compress, loop1, loop2, loop3); gcm(*);