From 77bb4cf7b80d66f750383e4d30232b4fa688ee23 Mon Sep 17 00:00:00 2001 From: xavierrouth <xavierrouth@gmail.com> Date: Wed, 5 Mar 2025 22:05:06 -0600 Subject: [PATCH 1/5] cava gamut fusion debug --- hercules_opt/src/unforkify.rs | 6 ++ juno_samples/cava/Cargo.toml | 1 + juno_samples/cava/src/cpu.sch | 8 ++- juno_samples/cava/src/gpu.sch | 32 ++++++---- juno_samples/edge_detection/Cargo.toml | 1 + .../benches/edge_detection_bench.rs | 1 + juno_samples/edge_detection/src/gpu.sch | 64 ++++++++++++++----- 7 files changed, 83 insertions(+), 30 deletions(-) diff --git a/hercules_opt/src/unforkify.rs b/hercules_opt/src/unforkify.rs index 2d6cf7b3..2deb7165 100644 --- a/hercules_opt/src/unforkify.rs +++ b/hercules_opt/src/unforkify.rs @@ -313,6 +313,12 @@ pub fn unforkify( edit = edit.delete_node(*reduce)?; } + edit.sub_edit(fork, proj_exit_id); + edit.sub_edit(fork, proj_back_id); + edit.sub_edit(fork, neq_id); + edit.sub_edit(fork, add_id); + + edit = edit.delete_node(fork)?; edit = edit.delete_node(join)?; for tid in tids { diff --git a/juno_samples/cava/Cargo.toml b/juno_samples/cava/Cargo.toml index bdf144da..279a81f0 100644 --- a/juno_samples/cava/Cargo.toml +++ b/juno_samples/cava/Cargo.toml @@ -14,6 +14,7 @@ path = "src/lib.rs" [features] cuda = ["juno_build/cuda", "hercules_rt/cuda"] seq = [] +dont_fuse_gamut = [] [build-dependencies] juno_build = { path = "../../juno_build" } diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index ada2f552..7d788c2f 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -111,11 +111,15 @@ fixpoint { fork-fusion(fuse4@channel_loop); } simpl!(fuse4); -array-slf(fuse4); -simpl!(fuse4); + +if !feature("dont_fuse_gamut") { + array-slf(fuse4); + simpl!(fuse4); +} if !feature("seq") { let par = fuse4@image_loop \ fuse4@channel_loop; + let par = par \ fuse4@cp_loop; fork-tile[4, 1, false, false](par); fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch index 0ef466c0..daf52339 100644 --- a/juno_samples/cava/src/gpu.sch +++ b/juno_samples/cava/src/gpu.sch @@ -115,12 +115,25 @@ fixpoint { fork-fusion(fuse4@channel_loop); } simpl!(fuse4); -array-slf(fuse4); -simpl!(fuse4); -fork-tile[2, 0, false, true](fuse4@channel_loop); -let out = fork-split(fuse4@channel_loop); -fork-unroll(out.cava_3.fj1); -unforkify(fuse4@channel_loop); + +if !feature("dont_fuse_gamut") { + array-slf(fuse4); + simpl!(fuse4); + fork-tile[2, 0, false, true](fuse4@channel_loop); + let out = fork-split(fuse4@channel_loop); + fork-unroll(out.cava_3.fj1); + unforkify(fuse4@channel_loop); +} + +let par = fuse4@image_loop \ fuse4@channel_loop; +let par = par \ fuse4@cp_loop; + +fork-tile[4, 1, false, true](par); +fork-tile[8, 0, false, true](par); +fork-interchange[1, 2](par); +let split = fork-split(par); +fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2); +fork-coalesce(split.cava_3.fj2); no-memset(fuse5@res1); no-memset(fuse5@res2); @@ -133,12 +146,7 @@ simpl!(fuse5); array-slf(fuse5); simpl!(fuse5); -fork-tile[4, 1, false, true](fuse4); -fork-tile[8, 0, false, true](fuse4); -fork-interchange[1, 2](fuse4); -let split = fork-split(fuse4); -fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2); -fork-coalesce(split.cava_3.fj2); + delete-uncalled(*); simpl!(*); diff --git a/juno_samples/edge_detection/Cargo.toml b/juno_samples/edge_detection/Cargo.toml index 8def7500..6e53500a 100644 --- a/juno_samples/edge_detection/Cargo.toml +++ b/juno_samples/edge_detection/Cargo.toml @@ -8,6 +8,7 @@ edition = "2021" opencv = ["dep:opencv"] cuda = ["juno_build/cuda", "hercules_rt/cuda"] seq = [] +warp_tile = [] [[bin]] name = "juno_edge_detection" diff --git a/juno_samples/edge_detection/benches/edge_detection_bench.rs b/juno_samples/edge_detection/benches/edge_detection_bench.rs index 76035275..1fa08506 100644 --- a/juno_samples/edge_detection/benches/edge_detection_bench.rs +++ b/juno_samples/edge_detection/benches/edge_detection_bench.rs @@ -17,6 +17,7 @@ juno_build::juno!("edge_detection"); fn edge_detection_bench(c: &mut Criterion) { let mut group = c.benchmark_group("edge detection bench"); group.sample_size(10); + group.measurement_time(std::time::Duration::from_secs(25)); let input = "examples/formula1_scaled.mp4"; diff --git a/juno_samples/edge_detection/src/gpu.sch b/juno_samples/edge_detection/src/gpu.sch index 666f6cef..3a541dab 100644 --- a/juno_samples/edge_detection/src/gpu.sch +++ b/juno_samples/edge_detection/src/gpu.sch @@ -95,22 +95,54 @@ fixpoint { fork-guard-elim(max_gradient); fork-coalesce(max_gradient); } -simpl!(max_gradient); -fork-dim-merge(max_gradient); -simpl!(max_gradient); -fork-tile[32, 0, false, true](max_gradient); -let out = fork-split(max_gradient); -clean-monoid-reduces(max_gradient); -simpl!(max_gradient); -let fission = fork-fission[out._4_max_gradient.fj0](max_gradient); -simpl!(max_gradient); -fork-tile[32, 0, false, true](fission._4_max_gradient.fj_bottom); -let out = fork-split(fission._4_max_gradient.fj_bottom); -clean-monoid-reduces(max_gradient); -simpl!(max_gradient); -let top = outline(fission._4_max_gradient.fj_top); -let bottom = outline(out._4_max_gradient.fj0); -gpu(top, bottom); + +if !feature("seq") { + if !feature("warp_tile") { + simpl!(max_gradient); + fork-dim-merge(max_gradient); + simpl!(max_gradient); + fork-tile[32, 0, false, true](max_gradient); + let out1 = fork-split(max_gradient); + clean-monoid-reduces(max_gradient); + simpl!(max_gradient); + let fission = fork-fission[out1._4_max_gradient.fj0](max_gradient); + simpl!(max_gradient); + fork-tile[32, 0, false, true](fission._4_max_gradient.fj_bottom); + let out2 = fork-split(fission._4_max_gradient.fj_bottom); + clean-monoid-reduces(max_gradient); + simpl!(max_gradient); + unforkify(out1._4_max_gradient.fj1); + + unforkify(out2._4_max_gradient.fj1); + simpl!(max_gradient); + let top = outline(fission._4_max_gradient.fj_top); + let bottom = outline(out2._4_max_gradient.fj0); + gpu(top, bottom); + } else { + simpl!(max_gradient); + fork-dim-merge(max_gradient); + simpl!(max_gradient); + fork-tile[32, 0, false, true](max_gradient); + let out = fork-split(max_gradient); + clean-monoid-reduces(max_gradient); + simpl!(max_gradient); + let fission = fork-fission[out._4_max_gradient.fj0](max_gradient); + simpl!(max_gradient); + fork-tile[32, 0, false, true](fission._4_max_gradient.fj_bottom); + let out = fork-split(fission._4_max_gradient.fj_bottom); + clean-monoid-reduces(max_gradient); + simpl!(max_gradient); + let top = outline(fission._4_max_gradient.fj_top); + let bottom = outline(out._4_max_gradient.fj0); + gpu(top, bottom); + } +} else { + simpl!(max_gradient); + fork-split(max_gradient); + unforkify(max_gradient); + gpu(max_gradient); +} + ip-sroa(*); sroa(*); simpl!(*); -- GitLab From 4578b15e2ec553c6dc4beaefc55f69737bbbb9ed Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Wed, 5 Mar 2025 22:23:10 -0600 Subject: [PATCH 2/5] revert cava schedules --- juno_samples/cava/src/cpu.sch | 10 ++++------ juno_samples/cava/src/gpu.sch | 32 ++++++++++++-------------------- 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index 7d788c2f..32e2b63b 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -111,15 +111,11 @@ fixpoint { fork-fusion(fuse4@channel_loop); } simpl!(fuse4); - -if !feature("dont_fuse_gamut") { - array-slf(fuse4); - simpl!(fuse4); -} +array-slf(fuse4); +simpl!(fuse4); if !feature("seq") { let par = fuse4@image_loop \ fuse4@channel_loop; - let par = par \ fuse4@cp_loop; fork-tile[4, 1, false, false](par); fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); @@ -128,6 +124,8 @@ if !feature("seq") { fork-coalesce(fuse4, fuse4_body); simpl!(fuse4, fuse4_body); fuse4 = fuse4_body; +} else { + fork-tile[6, 0, false, true](fuse4@channel_loop); } no-memset(fuse5@res1); diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch index daf52339..0ef466c0 100644 --- a/juno_samples/cava/src/gpu.sch +++ b/juno_samples/cava/src/gpu.sch @@ -115,25 +115,12 @@ fixpoint { fork-fusion(fuse4@channel_loop); } simpl!(fuse4); - -if !feature("dont_fuse_gamut") { - array-slf(fuse4); - simpl!(fuse4); - fork-tile[2, 0, false, true](fuse4@channel_loop); - let out = fork-split(fuse4@channel_loop); - fork-unroll(out.cava_3.fj1); - unforkify(fuse4@channel_loop); -} - -let par = fuse4@image_loop \ fuse4@channel_loop; -let par = par \ fuse4@cp_loop; - -fork-tile[4, 1, false, true](par); -fork-tile[8, 0, false, true](par); -fork-interchange[1, 2](par); -let split = fork-split(par); -fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2); -fork-coalesce(split.cava_3.fj2); +array-slf(fuse4); +simpl!(fuse4); +fork-tile[2, 0, false, true](fuse4@channel_loop); +let out = fork-split(fuse4@channel_loop); +fork-unroll(out.cava_3.fj1); +unforkify(fuse4@channel_loop); no-memset(fuse5@res1); no-memset(fuse5@res2); @@ -146,7 +133,12 @@ simpl!(fuse5); array-slf(fuse5); simpl!(fuse5); - +fork-tile[4, 1, false, true](fuse4); +fork-tile[8, 0, false, true](fuse4); +fork-interchange[1, 2](fuse4); +let split = fork-split(fuse4); +fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2); +fork-coalesce(split.cava_3.fj2); delete-uncalled(*); simpl!(*); -- GitLab From 7031edf9b7c1d3c9d8982fc868b75d18afcc7751 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Wed, 5 Mar 2025 22:30:15 -0600 Subject: [PATCH 3/5] cava cpu dont_fuse_gamut --- juno_samples/cava/src/cpu.sch | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/juno_samples/cava/src/cpu.sch b/juno_samples/cava/src/cpu.sch index 32e2b63b..16beceeb 100644 --- a/juno_samples/cava/src/cpu.sch +++ b/juno_samples/cava/src/cpu.sch @@ -105,17 +105,20 @@ fixpoint { fork-coalesce(fuse4); } simpl!(fuse4); -fork-unroll(fuse4@channel_loop); -simpl!(fuse4); -fixpoint { - fork-fusion(fuse4@channel_loop); + +if !feature("dont_fuse_gamut") { + fork-unroll(fuse4@channel_loop); + simpl!(fuse4); + fixpoint { + fork-fusion(fuse4@channel_loop); + } + simpl!(fuse4); + array-slf(fuse4); + simpl!(fuse4); } -simpl!(fuse4); -array-slf(fuse4); -simpl!(fuse4); if !feature("seq") { - let par = fuse4@image_loop \ fuse4@channel_loop; + let par = fuse4@image_loop \ fuse4@channel_loop \ fuse4@cp_loop; fork-tile[4, 1, false, false](par); fork-tile[8, 0, false, false](par); fork-interchange[1, 2](par); -- GitLab From dfcb25fff16ef08a52c77ad01ff60e2c980cc93d Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Wed, 5 Mar 2025 22:37:33 -0600 Subject: [PATCH 4/5] cava gpu dont_fuse_gamut --- juno_samples/cava/src/gpu.sch | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch index 0ef466c0..0981804f 100644 --- a/juno_samples/cava/src/gpu.sch +++ b/juno_samples/cava/src/gpu.sch @@ -109,18 +109,25 @@ fixpoint { fork-coalesce(fuse4); } simpl!(fuse4); -fork-unroll(fuse4@channel_loop); -simpl!(fuse4); -fixpoint { - fork-fusion(fuse4@channel_loop); + +if !feature("dont_fuse_gamut") { + fork-unroll(fuse4@channel_loop); + simpl!(fuse4); + fixpoint { + fork-fusion(fuse4@channel_loop); + } + simpl!(fuse4); } -simpl!(fuse4); array-slf(fuse4); simpl!(fuse4); -fork-tile[2, 0, false, true](fuse4@channel_loop); -let out = fork-split(fuse4@channel_loop); -fork-unroll(out.cava_3.fj1); -unforkify(fuse4@channel_loop); +unforkify(fuse4@channel_loop | fuse4@cp_loop); + +fork-tile[4, 1, false, true](fuse4); +fork-tile[8, 0, false, true](fuse4); +fork-interchange[1, 2](fuse4); +let split = fork-split(fuse4); +fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2); +fork-coalesce(split.cava_3.fj2); no-memset(fuse5@res1); no-memset(fuse5@res2); @@ -133,13 +140,6 @@ simpl!(fuse5); array-slf(fuse5); simpl!(fuse5); -fork-tile[4, 1, false, true](fuse4); -fork-tile[8, 0, false, true](fuse4); -fork-interchange[1, 2](fuse4); -let split = fork-split(fuse4); -fork-coalesce(split.cava_3.fj0 \ split.cava_3.fj2); -fork-coalesce(split.cava_3.fj2); - delete-uncalled(*); simpl!(*); -- GitLab From fcfb66b6dcad26c8772aa95dcc157845f88a9e3a Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Wed, 5 Mar 2025 22:45:12 -0600 Subject: [PATCH 5/5] um what --- hercules_opt/src/unforkify.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/hercules_opt/src/unforkify.rs b/hercules_opt/src/unforkify.rs index 2deb7165..cafe5ed1 100644 --- a/hercules_opt/src/unforkify.rs +++ b/hercules_opt/src/unforkify.rs @@ -318,7 +318,6 @@ pub fn unforkify( edit.sub_edit(fork, neq_id); edit.sub_edit(fork, add_id); - edit = edit.delete_node(fork)?; edit = edit.delete_node(join)?; for tid in tids { -- GitLab