From 9210d7486597ff047263693fd0b817c16c57054f Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Fri, 24 Jan 2025 10:48:22 -0600 Subject: [PATCH 1/5] Fix assertion in GCM --- hercules_opt/src/gcm.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs index 5ea9485d..042100de 100644 --- a/hercules_opt/src/gcm.rs +++ b/hercules_opt/src/gcm.rs @@ -837,19 +837,16 @@ fn liveness_dataflow( liveness.insert(NodeID::new(bb_idx), vec![BTreeSet::new(); insts.len() + 1]); } let mut num_phis_reduces = vec![0; function.nodes.len()]; - let mut reducing = vec![false; function.nodes.len()]; + let mut has_phi = vec![false; function.nodes.len()]; + let mut has_reduce = vec![false; function.nodes.len()]; for (node_idx, bb) in bbs.0.iter().enumerate() { let node = &function.nodes[node_idx]; if node.is_phi() || node.is_reduce() { num_phis_reduces[bb.idx()] += 1; - // Phis and reduces can't be in the same basic block. - if node.is_reduce() { - assert!(num_phis_reduces[bb.idx()] == 0 || reducing[bb.idx()]); - reducing[bb.idx()] = true; - } else { - assert!(!reducing[bb.idx()]); - } } + has_phi[bb.idx()] = node.is_phi(); + has_reduce[bb.idx()] = node.is_reduce(); + assert!(!has_phi[bb.idx()] || !has_reduce[bb.idx()]); } let is_obj = |id: NodeID| !objects[&func_id].objects(id).is_empty(); @@ -861,7 +858,7 @@ fn liveness_dataflow( let last_pt = bbs.1[bb.idx()].len(); let old_value = &liveness[&bb][last_pt]; let mut new_value = BTreeSet::new(); - for succ in control_subgraph.succs(*bb).chain(if reducing[bb.idx()] { + for succ in control_subgraph.succs(*bb).chain(if has_reduce[bb.idx()] { Either::Left(once(*bb)) } else { Either::Right(empty()) -- GitLab From 63bfb720a5e485d36d6216b52ae8439362cdde0f Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Fri, 24 Jan 2025 11:21:34 -0600 Subject: [PATCH 2/5] Fix bug in loop tree --- hercules_ir/src/loops.rs | 29 +++++++++++++++++++---------- hercules_opt/src/gcm.rs | 1 - 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/hercules_ir/src/loops.rs b/hercules_ir/src/loops.rs index 13e935e0..1d706c78 100644 --- a/hercules_ir/src/loops.rs +++ b/hercules_ir/src/loops.rs @@ -152,16 +152,7 @@ pub fn loops( }) .collect(); - // Step 6: compute the inverse loop map - this maps control nodes to which - // loop they are in (keyed by header), if they are in one. - let mut inverse_loops = HashMap::new(); - for (header, (contents, _)) in loops.iter() { - for idx in contents.iter_ones() { - inverse_loops.insert(NodeID::new(idx), *header); - } - } - - // Step 7: compute loop tree nesting. + // Step 6: compute loop tree nesting. let mut nesting = HashMap::new(); let mut worklist: VecDeque<NodeID> = loops.keys().map(|id| *id).collect(); while let Some(header) = worklist.pop_front() { @@ -175,6 +166,24 @@ pub fn loops( } } + // Step 7: compute the inverse loop map - this maps control nodes to which + // loop they are in (identified by header), if they are in one. Pick the + // most nested loop as the loop they are in. + let mut inverse_loops = HashMap::new(); + for (header, (contents, _)) in loops.iter() { + for idx in contents.iter_ones() { + let id = NodeID::new(idx); + if let Some(old_header) = inverse_loops.get(&id) + && nesting[old_header] > nesting[header] + { + // If the inserted header is more deeply nested, don't do anything. + assert!(nesting[old_header] != nesting[header] || old_header == header); + } else { + inverse_loops.insert(id, *header); + } + } + } + LoopTree { root, loops, diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs index 042100de..1323d5a0 100644 --- a/hercules_opt/src/gcm.rs +++ b/hercules_opt/src/gcm.rs @@ -5,7 +5,6 @@ use bitvec::prelude::*; use either::Either; use union_find::{QuickFindUf, UnionBySize, UnionFind}; -use hercules_cg::*; use hercules_ir::*; use crate::*; -- GitLab From 4a8e4118edb1cad44e0872c27726d65727b43edb Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Fri, 24 Jan 2025 11:47:23 -0600 Subject: [PATCH 3/5] hm --- hercules_samples/dot/Cargo.toml | 3 +++ hercules_samples/dot/build.rs | 2 ++ hercules_samples/dot/src/cpu.sch | 12 ++++++++++++ hercules_samples/dot/src/gpu.sch | 13 +++++++++++++ 4 files changed, 30 insertions(+) create mode 100644 hercules_samples/dot/src/cpu.sch create mode 100644 hercules_samples/dot/src/gpu.sch diff --git a/hercules_samples/dot/Cargo.toml b/hercules_samples/dot/Cargo.toml index 69cd39e3..99a48115 100644 --- a/hercules_samples/dot/Cargo.toml +++ b/hercules_samples/dot/Cargo.toml @@ -4,6 +4,9 @@ version = "0.1.0" authors = ["Russel Arbore <rarbore2@illinois.edu>"] edition = "2021" +[features] +cuda = ["hercules_rt/cuda"] + [build-dependencies] juno_build = { path = "../../juno_build" } diff --git a/hercules_samples/dot/build.rs b/hercules_samples/dot/build.rs index 2a239bc6..8657fdc1 100644 --- a/hercules_samples/dot/build.rs +++ b/hercules_samples/dot/build.rs @@ -4,6 +4,8 @@ fn main() { JunoCompiler::new() .ir_in_src("dot.hir") .unwrap() + .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .unwrap() .build() .unwrap(); } diff --git a/hercules_samples/dot/src/cpu.sch b/hercules_samples/dot/src/cpu.sch new file mode 100644 index 00000000..58a7266d --- /dev/null +++ b/hercules_samples/dot/src/cpu.sch @@ -0,0 +1,12 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); + +ip-sroa(*); +sroa(*); +unforkify(*); +dce(*); + +gcm(*); diff --git a/hercules_samples/dot/src/gpu.sch b/hercules_samples/dot/src/gpu.sch new file mode 100644 index 00000000..956eb996 --- /dev/null +++ b/hercules_samples/dot/src/gpu.sch @@ -0,0 +1,13 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); +gpu(*); +host(dot); + +ip-sroa(*); +sroa(*); +dce(*); + +gcm(*); -- GitLab From 367e853fb29b32a243a718289081d8b58c3550d7 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Fri, 24 Jan 2025 12:42:05 -0600 Subject: [PATCH 4/5] comment out gpu schedule for now --- hercules_samples/dot/build.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hercules_samples/dot/build.rs b/hercules_samples/dot/build.rs index 8657fdc1..4cfd2a87 100644 --- a/hercules_samples/dot/build.rs +++ b/hercules_samples/dot/build.rs @@ -4,7 +4,8 @@ fn main() { JunoCompiler::new() .ir_in_src("dot.hir") .unwrap() - .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + //.schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .schedule_in_src("cpu.sch") .unwrap() .build() .unwrap(); -- GitLab From 19de48e970e4feaeed996efe7d02e112979f7dc0 Mon Sep 17 00:00:00 2001 From: Russel Arbore <russel.jma@gmail.com> Date: Fri, 24 Jan 2025 12:55:23 -0600 Subject: [PATCH 5/5] add gpu schedule for matmul, color functions in dot --- hercules_ir/src/dot.rs | 8 +++++++- hercules_samples/matmul/Cargo.toml | 3 +++ hercules_samples/matmul/build.rs | 3 +++ hercules_samples/matmul/src/cpu.sch | 14 ++++++++++++++ hercules_samples/matmul/src/gpu.sch | 15 +++++++++++++++ 5 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 hercules_samples/matmul/src/cpu.sch create mode 100644 hercules_samples/matmul/src/gpu.sch diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs index 9dd2cb1e..22cd0beb 100644 --- a/hercules_ir/src/dot.rs +++ b/hercules_ir/src/dot.rs @@ -219,7 +219,13 @@ fn write_subgraph_header<W: Write>( } else { write!(w, "label=\"{}\"\n", function.name)?; } - write!(w, "bgcolor=ivory4\n")?; + let color = match function.device { + Some(Device::LLVM) => "paleturquoise1", + Some(Device::CUDA) => "darkseagreen1", + Some(Device::AsyncRust) => "peachpuff1", + None => "ivory2", + }; + write!(w, "bgcolor={}\n", color)?; write!(w, "cluster=true\n")?; Ok(()) } diff --git a/hercules_samples/matmul/Cargo.toml b/hercules_samples/matmul/Cargo.toml index 9066c153..89e46dd6 100644 --- a/hercules_samples/matmul/Cargo.toml +++ b/hercules_samples/matmul/Cargo.toml @@ -4,6 +4,9 @@ version = "0.1.0" authors = ["Russel Arbore <rarbore2@illinois.edu>"] edition = "2021" +[features] +cuda = ["hercules_rt/cuda"] + [build-dependencies] juno_build = { path = "../../juno_build" } diff --git a/hercules_samples/matmul/build.rs b/hercules_samples/matmul/build.rs index 08478dea..f895af86 100644 --- a/hercules_samples/matmul/build.rs +++ b/hercules_samples/matmul/build.rs @@ -4,6 +4,9 @@ fn main() { JunoCompiler::new() .ir_in_src("matmul.hir") .unwrap() + //.schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .schedule_in_src("cpu.sch") + .unwrap() .build() .unwrap(); } diff --git a/hercules_samples/matmul/src/cpu.sch b/hercules_samples/matmul/src/cpu.sch new file mode 100644 index 00000000..42dda6e3 --- /dev/null +++ b/hercules_samples/matmul/src/cpu.sch @@ -0,0 +1,14 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); + +ip-sroa(*); +sroa(*); +fork-split(*); +unforkify(*); +dce(*); +float-collections(*); + +gcm(*); diff --git a/hercules_samples/matmul/src/gpu.sch b/hercules_samples/matmul/src/gpu.sch new file mode 100644 index 00000000..9067a190 --- /dev/null +++ b/hercules_samples/matmul/src/gpu.sch @@ -0,0 +1,15 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); +gpu(*); +host(matmul); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); + +gcm(*); +xdot[true](*); -- GitLab