Skip to content
Snippets Groups Projects
Commit 3ce50c43 authored by Aaron Councilman's avatar Aaron Councilman
Browse files

Add sequential feature to benchmarks

parent 94950efe
No related branches found
No related tags found
1 merge request!219Sequential schedules
Pipeline #202052 passed
Showing with 194 additions and 140 deletions
...@@ -13,6 +13,7 @@ path = "src/lib.rs" ...@@ -13,6 +13,7 @@ path = "src/lib.rs"
[features] [features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"] cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[build-dependencies] [build-dependencies]
juno_build = { path = "../../juno_build" } juno_build = { path = "../../juno_build" }
...@@ -30,4 +31,4 @@ criterion = { version = "0.5", features = ["html_reports"] } ...@@ -30,4 +31,4 @@ criterion = { version = "0.5", features = ["html_reports"] }
[[bench]] [[bench]]
name = "cava_bench" name = "cava_bench"
harness = false harness = false
\ No newline at end of file
...@@ -113,14 +113,18 @@ fixpoint { ...@@ -113,14 +113,18 @@ fixpoint {
simpl!(fuse4); simpl!(fuse4);
array-slf(fuse4); array-slf(fuse4);
simpl!(fuse4); simpl!(fuse4);
let par = fuse4@image_loop \ fuse4@channel_loop;
fork-tile[4, 1, false, false](par); if !feature("seq") {
fork-tile[8, 0, false, false](par); let par = fuse4@image_loop \ fuse4@channel_loop;
fork-interchange[1, 2](par); fork-tile[4, 1, false, false](par);
let split = fork-split(par); fork-tile[8, 0, false, false](par);
let fuse4_body = outline(split.cava_3.fj2); fork-interchange[1, 2](par);
fork-coalesce(fuse4, fuse4_body); let split = fork-split(par);
simpl!(fuse4, fuse4_body); let fuse4_body = outline(split.cava_3.fj2);
fork-coalesce(fuse4, fuse4_body);
simpl!(fuse4, fuse4_body);
fuse4 = fuse4_body;
}
no-memset(fuse5@res1); no-memset(fuse5@res1);
no-memset(fuse5@res2); no-memset(fuse5@res2);
...@@ -136,8 +140,8 @@ simpl!(fuse5); ...@@ -136,8 +140,8 @@ simpl!(fuse5);
delete-uncalled(*); delete-uncalled(*);
simpl!(*); simpl!(*);
fork-split(fuse1, fuse2, fuse3, fuse4_body, fuse5); fork-split(fuse1, fuse2, fuse3, fuse4, fuse5);
unforkify(fuse1, fuse2, fuse3, fuse4_body, fuse5); unforkify(fuse1, fuse2, fuse3, fuse4, fuse5);
simpl!(*); simpl!(*);
......
...@@ -7,6 +7,7 @@ edition = "2021" ...@@ -7,6 +7,7 @@ edition = "2021"
[features] [features]
opencv = ["dep:opencv"] opencv = ["dep:opencv"]
cuda = ["juno_build/cuda", "hercules_rt/cuda"] cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[[bin]] [[bin]]
name = "juno_edge_detection" name = "juno_edge_detection"
......
...@@ -24,14 +24,18 @@ predication(gaussian_smoothing); ...@@ -24,14 +24,18 @@ predication(gaussian_smoothing);
simpl!(gaussian_smoothing); simpl!(gaussian_smoothing);
predication(gaussian_smoothing); predication(gaussian_smoothing);
simpl!(gaussian_smoothing); simpl!(gaussian_smoothing);
let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop;
fork-tile[4, 1, false, false](par); if !feature("seq") {
fork-tile[8, 0, false, false](par); let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop;
fork-interchange[1, 2](par); fork-tile[4, 1, false, false](par);
let split = fork-split(par); fork-tile[8, 0, false, false](par);
let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2); fork-interchange[1, 2](par);
fork-coalesce(gaussian_smoothing, gaussian_smoothing_body); let split = fork-split(par);
simpl!(gaussian_smoothing, gaussian_smoothing_body); let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2);
fork-coalesce(gaussian_smoothing, gaussian_smoothing_body);
simpl!(gaussian_smoothing, gaussian_smoothing_body);
gaussian_smoothing = gaussian_smoothing_body;
}
no-memset(laplacian_estimate@res); no-memset(laplacian_estimate@res);
fixpoint { fixpoint {
...@@ -40,15 +44,19 @@ fixpoint { ...@@ -40,15 +44,19 @@ fixpoint {
fork-coalesce(laplacian_estimate); fork-coalesce(laplacian_estimate);
} }
simpl!(laplacian_estimate); simpl!(laplacian_estimate);
let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop;
fork-tile[4, 1, false, false](par); if !feature("seq") {
fork-tile[8, 0, false, false](par); let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop;
fork-interchange[1, 2](par); fork-tile[4, 1, false, false](par);
let split = fork-split(par); fork-tile[8, 0, false, false](par);
let body = split._1_laplacian_estimate.fj2; fork-interchange[1, 2](par);
let laplacian_estimate_body = outline(body); let split = fork-split(par);
fork-coalesce(laplacian_estimate, laplacian_estimate_body); let body = split._1_laplacian_estimate.fj2;
simpl!(laplacian_estimate, laplacian_estimate_body); let laplacian_estimate_body = outline(body);
fork-coalesce(laplacian_estimate, laplacian_estimate_body);
simpl!(laplacian_estimate, laplacian_estimate_body);
laplacian_estimate = laplacian_estimate_body;
}
no-memset(zero_crossings@res); no-memset(zero_crossings@res);
fixpoint { fixpoint {
...@@ -57,15 +65,19 @@ fixpoint { ...@@ -57,15 +65,19 @@ fixpoint {
fork-coalesce(zero_crossings); fork-coalesce(zero_crossings);
} }
simpl!(zero_crossings); simpl!(zero_crossings);
let par = zero_crossings@image_loop \ zero_crossings@filter_loop;
fork-tile[4, 1, false, false](par); if !feature("seq") {
fork-tile[8, 0, false, false](par); let par = zero_crossings@image_loop \ zero_crossings@filter_loop;
fork-interchange[1, 2](par); fork-tile[4, 1, false, false](par);
let split = fork-split(par); fork-tile[8, 0, false, false](par);
let body = split._2_zero_crossings.fj2; fork-interchange[1, 2](par);
let zero_crossings_body = outline(body); let split = fork-split(par);
fork-coalesce(zero_crossings, zero_crossings_body); let body = split._2_zero_crossings.fj2;
simpl!(zero_crossings, zero_crossings_body); let zero_crossings_body = outline(body);
fork-coalesce(zero_crossings, zero_crossings_body);
simpl!(zero_crossings, zero_crossings_body);
zero_crossings = zero_crossings_body;
}
no-memset(gradient@res); no-memset(gradient@res);
fixpoint { fixpoint {
...@@ -84,17 +96,23 @@ fixpoint { ...@@ -84,17 +96,23 @@ fixpoint {
fork-coalesce(max_gradient); fork-coalesce(max_gradient);
} }
simpl!(max_gradient); simpl!(max_gradient);
fork-dim-merge(max_gradient);
simpl!(max_gradient); if !feature("seq") {
fork-tile[32, 0, false, false](max_gradient); fork-dim-merge(max_gradient);
let split = fork-split(max_gradient); simpl!(max_gradient);
clean-monoid-reduces(max_gradient); fork-tile[32, 0, false, false](max_gradient);
let out = outline(split._4_max_gradient.fj1); let split = fork-split(max_gradient);
simpl!(max_gradient, out); clean-monoid-reduces(max_gradient);
unforkify(out); let out = outline(split._4_max_gradient.fj1);
let out = fork-fission[split._4_max_gradient.fj0](max_gradient); simpl!(max_gradient, out);
simpl!(max_gradient); unforkify(out);
unforkify(out._4_max_gradient.fj_bottom); let out = fork-fission[split._4_max_gradient.fj0](max_gradient);
simpl!(max_gradient);
unforkify(out._4_max_gradient.fj_bottom);
} else {
fork-split(max_gradient);
unforkify(max_gradient);
}
no-memset(reject_zero_crossings@res); no-memset(reject_zero_crossings@res);
fixpoint { fixpoint {
...@@ -104,18 +122,22 @@ fixpoint { ...@@ -104,18 +122,22 @@ fixpoint {
} }
predication(reject_zero_crossings); predication(reject_zero_crossings);
simpl!(reject_zero_crossings); simpl!(reject_zero_crossings);
fork-tile[4, 1, false, false](reject_zero_crossings);
fork-tile[8, 0, false, false](reject_zero_crossings); if !feature("seq") {
fork-interchange[1, 2](reject_zero_crossings); fork-tile[4, 1, false, false](reject_zero_crossings);
let split = fork-split(reject_zero_crossings); fork-tile[8, 0, false, false](reject_zero_crossings);
let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2); fork-interchange[1, 2](reject_zero_crossings);
fork-coalesce(reject_zero_crossings, reject_zero_crossings_body); let split = fork-split(reject_zero_crossings);
simpl!(reject_zero_crossings, reject_zero_crossings_body); let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2);
fork-coalesce(reject_zero_crossings, reject_zero_crossings_body);
simpl!(reject_zero_crossings, reject_zero_crossings_body);
reject_zero_crossings = reject_zero_crossings_body;
}
async-call(edge_detection@le, edge_detection@zc); async-call(edge_detection@le, edge_detection@zc);
fork-split(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings_body); fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);
unforkify(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings_body); unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);
simpl!(*); simpl!(*);
......
...@@ -13,6 +13,7 @@ path = "src/lib.rs" ...@@ -13,6 +13,7 @@ path = "src/lib.rs"
[features] [features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"] cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[build-dependencies] [build-dependencies]
juno_build = { path = "../../../juno_build" } juno_build = { path = "../../../juno_build" }
...@@ -31,4 +32,4 @@ criterion = { version = "0.5", features = ["html_reports"] } ...@@ -31,4 +32,4 @@ criterion = { version = "0.5", features = ["html_reports"] }
[[bench]] [[bench]]
name = "backprop_bench" name = "backprop_bench"
harness = false harness = false
\ No newline at end of file
...@@ -37,10 +37,12 @@ inline(backprop@forward_input, backprop@forward_hidden); ...@@ -37,10 +37,12 @@ inline(backprop@forward_input, backprop@forward_hidden);
let forward_input = outline(backprop@forward_input); let forward_input = outline(backprop@forward_input);
let forward_hidden = outline(backprop@forward_hidden); let forward_hidden = outline(backprop@forward_hidden);
fork-tile[16, 0, false, true](forward_input@outer_loop \ forward_input@inner_loop); if !feature("seq") {
let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_input@inner_loop); fork-tile[16, 0, false, true](forward_input@outer_loop \ forward_input@inner_loop);
let forward_input = outline(inner); let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_input@inner_loop);
inline(backprop@forward_input); forward_input = outline(inner);
inline(backprop@forward_input);
}
// The first call to adjust_weights has total loop dimensions of 1 * 17, so not // The first call to adjust_weights has total loop dimensions of 1 * 17, so not
// worth parallelizing (given that the body is trivial) // worth parallelizing (given that the body is trivial)
...@@ -50,10 +52,12 @@ inline(backprop@adjust_hidden, backprop@adjust_input); ...@@ -50,10 +52,12 @@ inline(backprop@adjust_hidden, backprop@adjust_input);
let adjust_hidden = outline(backprop@adjust_hidden); let adjust_hidden = outline(backprop@adjust_hidden);
let adjust_input = outline(backprop@adjust_input); let adjust_input = outline(backprop@adjust_input);
fork-tile[16, 0, false, true](adjust_input); if !feature("seq") {
let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input); fork-tile[16, 0, false, true](adjust_input);
let adjust_input = outline(inner); let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input);
inline(backprop@adjust_input); adjust_input = outline(inner);
inline(backprop@adjust_input);
}
delete-uncalled(*); delete-uncalled(*);
const-inline(*); const-inline(*);
......
...@@ -13,6 +13,7 @@ path = "src/lib.rs" ...@@ -13,6 +13,7 @@ path = "src/lib.rs"
[features] [features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"] cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[build-dependencies] [build-dependencies]
juno_build = { path = "../../../juno_build" } juno_build = { path = "../../../juno_build" }
......
...@@ -40,23 +40,24 @@ simpl!(collect); ...@@ -40,23 +40,24 @@ simpl!(collect);
parallel-fork(traverse, collect); parallel-fork(traverse, collect);
parallel-reduce(traverse, collect); parallel-reduce(traverse, collect);
fork-tile[32, 0, false, true](traverse, collect); if !feature("seq") {
let (outer, inner) = fork-reshape[[1], [0]](traverse); fork-tile[32, 0, false, true](traverse, collect);
let traverse_body = outline(inner); let (outer, inner) = fork-reshape[[1], [0]](traverse);
let (outer, inner) = fork-reshape[[1], [0]](collect); traverse = outline(inner);
let collect_body = outline(inner); let (outer, inner) = fork-reshape[[1], [0]](collect);
collect = outline(inner);
let init_body = init;
// Following code seems to generate breaking RT code // Following code seems to generate breaking RT code
//fork-tile[32, 0, false, true](init); //fork-tile[32, 0, false, true](init);
//let (outer, inner) = fork-reshape[[1], [0]](init); //let (outer, inner) = fork-reshape[[1], [0]](init);
//let init_body = outline(inner); //init = outline(inner);
//inline(bfs@cost_init); //inline(bfs@cost_init);
inline(bfs@loop1, bfs@loop2); inline(bfs@loop1, bfs@loop2);
}
delete-uncalled(*); delete-uncalled(*);
const-inline(*); const-inline(*);
unforkify(init_body, traverse_body, collect_body); unforkify(init, traverse, collect);
simpl!(*); simpl!(*);
gcm(*); gcm(*);
...@@ -13,6 +13,7 @@ path = "src/lib.rs" ...@@ -13,6 +13,7 @@ path = "src/lib.rs"
[features] [features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"] cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[build-dependencies] [build-dependencies]
juno_build = { path = "../../../juno_build" } juno_build = { path = "../../../juno_build" }
......
...@@ -27,29 +27,35 @@ fixpoint { ...@@ -27,29 +27,35 @@ fixpoint {
simpl!(*); simpl!(*);
unforkify(compute_flux@inner_loop); unforkify(compute_flux@inner_loop);
fork-tile[32, 0, false, false](compute_step_factor); if !feature("seq") {
let split = fork-split(compute_step_factor); fork-tile[32, 0, false, false](compute_step_factor);
let compute_step_factor_body = outline(split._4_compute_step_factor.fj1); let split = fork-split(compute_step_factor);
fork-coalesce(compute_step_factor, compute_step_factor_body); let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
simpl!(compute_step_factor, compute_step_factor_body); fork-coalesce(compute_step_factor, compute_step_factor_body);
simpl!(compute_step_factor, compute_step_factor_body);
compute_step_factor = compute_step_factor_body;
fork-tile[32, 0, false, false](compute_flux); fork-tile[32, 0, false, false](compute_flux);
let split = fork-split(compute_flux); let split = fork-split(compute_flux);
let compute_flux_body = outline(split._6_compute_flux.fj1); let compute_flux_body = outline(split._6_compute_flux.fj1);
fork-coalesce(compute_flux, compute_flux_body); fork-coalesce(compute_flux, compute_flux_body);
simpl!(compute_flux, compute_flux_body); simpl!(compute_flux, compute_flux_body);
compute_flux = compute_flux_body;
fork-tile[32, 0, false, false](time_step); fork-tile[32, 0, false, false](time_step);
let split = fork-split(time_step); let split = fork-split(time_step);
let time_step_body = outline(split._7_time_step.fj1); let time_step_body = outline(split._7_time_step.fj1);
fork-coalesce(time_step, time_step_body); fork-coalesce(time_step, time_step_body);
simpl!(time_step, time_step_body); simpl!(time_step, time_step_body);
time_step = time_step_body;
fork-tile[32, 0, false, false](copy_vars); fork-tile[32, 0, false, false](copy_vars);
let split = fork-split(copy_vars); let split = fork-split(copy_vars);
let copy_vars_body = outline(split._8_copy_vars.fj1); let copy_vars_body = outline(split._8_copy_vars.fj1);
fork-coalesce(copy_vars, copy_vars_body); fork-coalesce(copy_vars, copy_vars_body);
simpl!(copy_vars, copy_vars_body); simpl!(copy_vars, copy_vars_body);
copy_vars = copy_vars_body;
}
unforkify(compute_step_factor_body, compute_flux_body, time_step_body, copy_vars_body); unforkify(compute_step_factor, compute_flux, time_step, copy_vars);
gcm(*); gcm(*);
...@@ -27,35 +27,42 @@ simpl!(*); ...@@ -27,35 +27,42 @@ simpl!(*);
no-memset(compute_step_factor@res, compute_flux_contributions@res, compute_flux@res, copy_vars@res); no-memset(compute_step_factor@res, compute_flux_contributions@res, compute_flux@res, copy_vars@res);
unforkify(compute_flux@inner_loop); unforkify(compute_flux@inner_loop);
fork-tile[32, 0, false, false](compute_step_factor); if !feature("seq") {
let split = fork-split(compute_step_factor); fork-tile[32, 0, false, false](compute_step_factor);
let compute_step_factor_body = outline(split._4_compute_step_factor.fj1); let split = fork-split(compute_step_factor);
fork-coalesce(compute_step_factor, compute_step_factor_body); let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
simpl!(compute_step_factor, compute_step_factor_body); fork-coalesce(compute_step_factor, compute_step_factor_body);
simpl!(compute_step_factor, compute_step_factor_body);
compute_step_factor = compute_step_factor_body;
fork-tile[32, 0, false, false](compute_flux_contributions); fork-tile[32, 0, false, false](compute_flux_contributions);
let split = fork-split(compute_flux_contributions); let split = fork-split(compute_flux_contributions);
let compute_flux_contributions_body = outline(split._6_compute_flux_contributions.fj1); let compute_flux_contributions_body = outline(split._6_compute_flux_contributions.fj1);
fork-coalesce(compute_flux_contributions, compute_flux_contributions_body); fork-coalesce(compute_flux_contributions, compute_flux_contributions_body);
simpl!(compute_flux_contributions, compute_flux_contributions_body); simpl!(compute_flux_contributions, compute_flux_contributions_body);
compute_flux_contributions = compute_flux_contributions_body;
fork-tile[32, 0, false, false](compute_flux); fork-tile[32, 0, false, false](compute_flux);
let split = fork-split(compute_flux); let split = fork-split(compute_flux);
let compute_flux_body = outline(split._7_compute_flux.fj1); let compute_flux_body = outline(split._7_compute_flux.fj1);
fork-coalesce(compute_flux, compute_flux_body); fork-coalesce(compute_flux, compute_flux_body);
simpl!(compute_flux, compute_flux_body); simpl!(compute_flux, compute_flux_body);
compute_flux = compute_flux_body;
fork-tile[32, 0, false, false](time_step); fork-tile[32, 0, false, false](time_step);
let split = fork-split(time_step); let split = fork-split(time_step);
let time_step_body = outline(split._8_time_step.fj1); let time_step_body = outline(split._8_time_step.fj1);
fork-coalesce(time_step, time_step_body); fork-coalesce(time_step, time_step_body);
simpl!(time_step, time_step_body); simpl!(time_step, time_step_body);
time_step = time_step_body;
fork-tile[32, 0, false, false](copy_vars); fork-tile[32, 0, false, false](copy_vars);
let split = fork-split(copy_vars); let split = fork-split(copy_vars);
let copy_vars_body = outline(split._9_copy_vars.fj1); let copy_vars_body = outline(split._9_copy_vars.fj1);
fork-coalesce(copy_vars, copy_vars_body); fork-coalesce(copy_vars, copy_vars_body);
simpl!(copy_vars, copy_vars_body); simpl!(copy_vars, copy_vars_body);
copy_vars = copy_vars_body;
}
unforkify(compute_step_factor_body, compute_flux_contributions_body, compute_flux_body, time_step_body, copy_vars_body); unforkify(compute_step_factor, compute_flux_contributions, compute_flux, time_step, copy_vars);
gcm(*); gcm(*);
...@@ -13,6 +13,7 @@ path = "src/lib.rs" ...@@ -13,6 +13,7 @@ path = "src/lib.rs"
[features] [features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"] cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[build-dependencies] [build-dependencies]
juno_build = { path = "../../../juno_build" } juno_build = { path = "../../../juno_build" }
......
...@@ -35,20 +35,24 @@ simpl!(*); ...@@ -35,20 +35,24 @@ simpl!(*);
slf(*); slf(*);
simpl!(*); simpl!(*);
fork-tile[32, 0, false, false](loop2); if !feature("seq") {
let split = fork-split(loop2); fork-tile[32, 0, false, false](loop2);
let loop2_body = outline(split.srad_1.fj1); let split = fork-split(loop2);
simpl!(loop2, loop2_body); let loop2_body = outline(split.srad_1.fj1);
simpl!(loop2, loop2_body);
loop2 = loop2_body;
fork-tile[32, 0, false, false](loop3); fork-tile[32, 0, false, false](loop3);
let split = fork-split(loop3); let split = fork-split(loop3);
let loop3_body = outline(split.srad_2.fj1); let loop3_body = outline(split.srad_2.fj1);
simpl!(loop3, loop3_body); simpl!(loop3, loop3_body);
loop3 = loop3_body;
inline(srad@loop2, srad@loop3); inline(srad@loop2, srad@loop3);
delete-uncalled(*); delete-uncalled(*);
}
fork-split(extract, compress, loop1, loop2_body, loop3_body); fork-split(extract, compress, loop1, loop2, loop3);
unforkify(extract, compress, loop1, loop2_body, loop3_body); unforkify(extract, compress, loop1, loop2, loop3);
gcm(*); gcm(*);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment