Skip to content
Snippets Groups Projects
Commit 209b1c4d authored by rarbore2's avatar rarbore2
Browse files

Ablation schedules

parent 123b595e
No related branches found
No related tags found
1 merge request!226Ablation schedules
...@@ -313,6 +313,11 @@ pub fn unforkify( ...@@ -313,6 +313,11 @@ pub fn unforkify(
edit = edit.delete_node(*reduce)?; edit = edit.delete_node(*reduce)?;
} }
edit.sub_edit(fork, proj_exit_id);
edit.sub_edit(fork, proj_back_id);
edit.sub_edit(fork, neq_id);
edit.sub_edit(fork, add_id);
edit = edit.delete_node(fork)?; edit = edit.delete_node(fork)?;
edit = edit.delete_node(join)?; edit = edit.delete_node(join)?;
for tid in tids { for tid in tids {
......
...@@ -14,6 +14,7 @@ path = "src/lib.rs" ...@@ -14,6 +14,7 @@ path = "src/lib.rs"
[features] [features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"] cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = [] seq = []
dont_fuse_gamut = []
[build-dependencies] [build-dependencies]
juno_build = { path = "../../juno_build" } juno_build = { path = "../../juno_build" }
......
...@@ -105,17 +105,20 @@ fixpoint { ...@@ -105,17 +105,20 @@ fixpoint {
fork-coalesce(fuse4); fork-coalesce(fuse4);
} }
simpl!(fuse4); simpl!(fuse4);
fork-unroll(fuse4@channel_loop);
simpl!(fuse4); if !feature("dont_fuse_gamut") {
fixpoint { fork-unroll(fuse4@channel_loop);
fork-fusion(fuse4@channel_loop); simpl!(fuse4);
fixpoint {
fork-fusion(fuse4@channel_loop);
}
simpl!(fuse4);
array-slf(fuse4);
simpl!(fuse4);
} }
simpl!(fuse4);
array-slf(fuse4);
simpl!(fuse4);
if !feature("seq") { if !feature("seq") {
let par = fuse4@image_loop \ fuse4@channel_loop; let par = fuse4@image_loop \ fuse4@channel_loop \ fuse4@cp_loop;
fork-tile[4, 1, false, false](par); fork-tile[4, 1, false, false](par);
fork-tile[8, 0, false, false](par); fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par); fork-interchange[1, 2](par);
......
...@@ -109,18 +109,25 @@ fixpoint { ...@@ -109,18 +109,25 @@ fixpoint {
fork-coalesce(fuse4); fork-coalesce(fuse4);
} }
simpl!(fuse4); simpl!(fuse4);
fork-unroll(fuse4@channel_loop);
simpl!(fuse4); if !feature("dont_fuse_gamut") {
fixpoint { fork-unroll(fuse4@channel_loop);
fork-fusion(fuse4@channel_loop); simpl!(fuse4);
fixpoint {
fork-fusion(fuse4@channel_loop);
}
simpl!(fuse4);
} }
simpl!(fuse4);
array-slf(fuse4); array-slf(fuse4);
simpl!(fuse4); simpl!(fuse4);
fork-tile[2, 0, false, true](fuse4@channel_loop); unforkify(fuse4@channel_loop | fuse4@cp_loop);
let out = fork-split(fuse4@channel_loop);
fork-unroll(out.cava_3.fj1); fork-tile[4, 1, false, true](fuse4);
unforkify(fuse4@channel_loop); fork-tile[8, 0, false, true](fuse4);
fork-interchange[1, 2](fuse4);
let split = fork-split(fuse4);
fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2);
fork-coalesce(split.cava_3.fj2);
no-memset(fuse5@res1); no-memset(fuse5@res1);
no-memset(fuse5@res2); no-memset(fuse5@res2);
...@@ -133,13 +140,6 @@ simpl!(fuse5); ...@@ -133,13 +140,6 @@ simpl!(fuse5);
array-slf(fuse5); array-slf(fuse5);
simpl!(fuse5); simpl!(fuse5);
fork-tile[4, 1, false, true](fuse4);
fork-tile[8, 0, false, true](fuse4);
fork-interchange[1, 2](fuse4);
let split = fork-split(fuse4);
fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2);
fork-coalesce(split.cava_3.fj2);
delete-uncalled(*); delete-uncalled(*);
simpl!(*); simpl!(*);
......
...@@ -8,6 +8,7 @@ edition = "2021" ...@@ -8,6 +8,7 @@ edition = "2021"
opencv = ["dep:opencv"] opencv = ["dep:opencv"]
cuda = ["juno_build/cuda", "hercules_rt/cuda"] cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = [] seq = []
warp_tile = []
[[bin]] [[bin]]
name = "juno_edge_detection" name = "juno_edge_detection"
......
...@@ -17,6 +17,7 @@ juno_build::juno!("edge_detection"); ...@@ -17,6 +17,7 @@ juno_build::juno!("edge_detection");
fn edge_detection_bench(c: &mut Criterion) { fn edge_detection_bench(c: &mut Criterion) {
let mut group = c.benchmark_group("edge detection bench"); let mut group = c.benchmark_group("edge detection bench");
group.sample_size(10); group.sample_size(10);
group.measurement_time(std::time::Duration::from_secs(25));
let input = "examples/formula1_scaled.mp4"; let input = "examples/formula1_scaled.mp4";
......
...@@ -95,22 +95,54 @@ fixpoint { ...@@ -95,22 +95,54 @@ fixpoint {
fork-guard-elim(max_gradient); fork-guard-elim(max_gradient);
fork-coalesce(max_gradient); fork-coalesce(max_gradient);
} }
simpl!(max_gradient);
fork-dim-merge(max_gradient); if !feature("seq") {
simpl!(max_gradient); if !feature("warp_tile") {
fork-tile[32, 0, false, true](max_gradient); simpl!(max_gradient);
let out = fork-split(max_gradient); fork-dim-merge(max_gradient);
clean-monoid-reduces(max_gradient); simpl!(max_gradient);
simpl!(max_gradient); fork-tile[32, 0, false, true](max_gradient);
let fission = fork-fission[out._4_max_gradient.fj0](max_gradient); let out1 = fork-split(max_gradient);
simpl!(max_gradient); clean-monoid-reduces(max_gradient);
fork-tile[32, 0, false, true](fission._4_max_gradient.fj_bottom); simpl!(max_gradient);
let out = fork-split(fission._4_max_gradient.fj_bottom); let fission = fork-fission[out1._4_max_gradient.fj0](max_gradient);
clean-monoid-reduces(max_gradient); simpl!(max_gradient);
simpl!(max_gradient); fork-tile[32, 0, false, true](fission._4_max_gradient.fj_bottom);
let top = outline(fission._4_max_gradient.fj_top); let out2 = fork-split(fission._4_max_gradient.fj_bottom);
let bottom = outline(out._4_max_gradient.fj0); clean-monoid-reduces(max_gradient);
gpu(top, bottom); simpl!(max_gradient);
unforkify(out1._4_max_gradient.fj1);
unforkify(out2._4_max_gradient.fj1);
simpl!(max_gradient);
let top = outline(fission._4_max_gradient.fj_top);
let bottom = outline(out2._4_max_gradient.fj0);
gpu(top, bottom);
} else {
simpl!(max_gradient);
fork-dim-merge(max_gradient);
simpl!(max_gradient);
fork-tile[32, 0, false, true](max_gradient);
let out = fork-split(max_gradient);
clean-monoid-reduces(max_gradient);
simpl!(max_gradient);
let fission = fork-fission[out._4_max_gradient.fj0](max_gradient);
simpl!(max_gradient);
fork-tile[32, 0, false, true](fission._4_max_gradient.fj_bottom);
let out = fork-split(fission._4_max_gradient.fj_bottom);
clean-monoid-reduces(max_gradient);
simpl!(max_gradient);
let top = outline(fission._4_max_gradient.fj_top);
let bottom = outline(out._4_max_gradient.fj0);
gpu(top, bottom);
}
} else {
simpl!(max_gradient);
fork-split(max_gradient);
unforkify(max_gradient);
gpu(max_gradient);
}
ip-sroa(*); ip-sroa(*);
sroa(*); sroa(*);
simpl!(*); simpl!(*);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment