diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs index 9dd2cb1e4c0ae76ac2d60bb9e638d95cca646016..22cd0beb7e5b6946c1116422441a0777f21f064b 100644 --- a/hercules_ir/src/dot.rs +++ b/hercules_ir/src/dot.rs @@ -219,7 +219,13 @@ fn write_subgraph_header<W: Write>( } else { write!(w, "label=\"{}\"\n", function.name)?; } - write!(w, "bgcolor=ivory4\n")?; + let color = match function.device { + Some(Device::LLVM) => "paleturquoise1", + Some(Device::CUDA) => "darkseagreen1", + Some(Device::AsyncRust) => "peachpuff1", + None => "ivory2", + }; + write!(w, "bgcolor={}\n", color)?; write!(w, "cluster=true\n")?; Ok(()) } diff --git a/hercules_ir/src/loops.rs b/hercules_ir/src/loops.rs index 13e935e0dd151ba3a29c4d07c9f9ee50341d5091..1d706c7834cf30fa3bf5e556d812917942a48d8b 100644 --- a/hercules_ir/src/loops.rs +++ b/hercules_ir/src/loops.rs @@ -152,16 +152,7 @@ pub fn loops( }) .collect(); - // Step 6: compute the inverse loop map - this maps control nodes to which - // loop they are in (keyed by header), if they are in one. - let mut inverse_loops = HashMap::new(); - for (header, (contents, _)) in loops.iter() { - for idx in contents.iter_ones() { - inverse_loops.insert(NodeID::new(idx), *header); - } - } - - // Step 7: compute loop tree nesting. + // Step 6: compute loop tree nesting. let mut nesting = HashMap::new(); let mut worklist: VecDeque<NodeID> = loops.keys().map(|id| *id).collect(); while let Some(header) = worklist.pop_front() { @@ -175,6 +166,24 @@ pub fn loops( } } + // Step 7: compute the inverse loop map - this maps control nodes to which + // loop they are in (identified by header), if they are in one. Pick the + // most nested loop as the loop they are in. + let mut inverse_loops = HashMap::new(); + for (header, (contents, _)) in loops.iter() { + for idx in contents.iter_ones() { + let id = NodeID::new(idx); + if let Some(old_header) = inverse_loops.get(&id) + && nesting[old_header] > nesting[header] + { + // If the inserted header is more deeply nested, don't do anything. + assert!(nesting[old_header] != nesting[header] || old_header == header); + } else { + inverse_loops.insert(id, *header); + } + } + } + LoopTree { root, loops, diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs index 5ea9485d108ea6454d856bf164d990ea5d7895f8..1323d5a05a784e76d4d3b040f014acd216c710c0 100644 --- a/hercules_opt/src/gcm.rs +++ b/hercules_opt/src/gcm.rs @@ -5,7 +5,6 @@ use bitvec::prelude::*; use either::Either; use union_find::{QuickFindUf, UnionBySize, UnionFind}; -use hercules_cg::*; use hercules_ir::*; use crate::*; @@ -837,19 +836,16 @@ fn liveness_dataflow( liveness.insert(NodeID::new(bb_idx), vec![BTreeSet::new(); insts.len() + 1]); } let mut num_phis_reduces = vec![0; function.nodes.len()]; - let mut reducing = vec![false; function.nodes.len()]; + let mut has_phi = vec![false; function.nodes.len()]; + let mut has_reduce = vec![false; function.nodes.len()]; for (node_idx, bb) in bbs.0.iter().enumerate() { let node = &function.nodes[node_idx]; if node.is_phi() || node.is_reduce() { num_phis_reduces[bb.idx()] += 1; - // Phis and reduces can't be in the same basic block. - if node.is_reduce() { - assert!(num_phis_reduces[bb.idx()] == 0 || reducing[bb.idx()]); - reducing[bb.idx()] = true; - } else { - assert!(!reducing[bb.idx()]); - } } + has_phi[bb.idx()] = node.is_phi(); + has_reduce[bb.idx()] = node.is_reduce(); + assert!(!has_phi[bb.idx()] || !has_reduce[bb.idx()]); } let is_obj = |id: NodeID| !objects[&func_id].objects(id).is_empty(); @@ -861,7 +857,7 @@ fn liveness_dataflow( let last_pt = bbs.1[bb.idx()].len(); let old_value = &liveness[&bb][last_pt]; let mut new_value = BTreeSet::new(); - for succ in control_subgraph.succs(*bb).chain(if reducing[bb.idx()] { + for succ in control_subgraph.succs(*bb).chain(if has_reduce[bb.idx()] { Either::Left(once(*bb)) } else { Either::Right(empty()) diff --git a/hercules_samples/dot/Cargo.toml b/hercules_samples/dot/Cargo.toml index 69cd39e388661b3f7f6dca53cf9210ab7050902c..99a48115197ce853941223ed360079ec5376583e 100644 --- a/hercules_samples/dot/Cargo.toml +++ b/hercules_samples/dot/Cargo.toml @@ -4,6 +4,9 @@ version = "0.1.0" authors = ["Russel Arbore <rarbore2@illinois.edu>"] edition = "2021" +[features] +cuda = ["hercules_rt/cuda"] + [build-dependencies] juno_build = { path = "../../juno_build" } diff --git a/hercules_samples/dot/build.rs b/hercules_samples/dot/build.rs index 2a239bc6c3ebd3780cb15358375c59bdfb2e25ae..4cfd2a87fba14d3c542bb54806a65da2d1a9b8f5 100644 --- a/hercules_samples/dot/build.rs +++ b/hercules_samples/dot/build.rs @@ -4,6 +4,9 @@ fn main() { JunoCompiler::new() .ir_in_src("dot.hir") .unwrap() + //.schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .schedule_in_src("cpu.sch") + .unwrap() .build() .unwrap(); } diff --git a/hercules_samples/dot/src/cpu.sch b/hercules_samples/dot/src/cpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..58a7266df5c71232aae41a969dcf286ec3a98385 --- /dev/null +++ b/hercules_samples/dot/src/cpu.sch @@ -0,0 +1,12 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); + +ip-sroa(*); +sroa(*); +unforkify(*); +dce(*); + +gcm(*); diff --git a/hercules_samples/dot/src/gpu.sch b/hercules_samples/dot/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..956eb99628a03a3efb3d77e97d93a8cb677bbd6a --- /dev/null +++ b/hercules_samples/dot/src/gpu.sch @@ -0,0 +1,13 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); +gpu(*); +host(dot); + +ip-sroa(*); +sroa(*); +dce(*); + +gcm(*); diff --git a/hercules_samples/matmul/Cargo.toml b/hercules_samples/matmul/Cargo.toml index 9066c1535e2c40400bdb3b5ca20a3e38237ef597..89e46dd682024012942e6a5014cc5f2f6ec12b83 100644 --- a/hercules_samples/matmul/Cargo.toml +++ b/hercules_samples/matmul/Cargo.toml @@ -4,6 +4,9 @@ version = "0.1.0" authors = ["Russel Arbore <rarbore2@illinois.edu>"] edition = "2021" +[features] +cuda = ["hercules_rt/cuda"] + [build-dependencies] juno_build = { path = "../../juno_build" } diff --git a/hercules_samples/matmul/build.rs b/hercules_samples/matmul/build.rs index 08478deaac459d9a94f79fdabce37da9a1205f89..f895af867a019dfd23381a4df2d9a02f80a032f8 100644 --- a/hercules_samples/matmul/build.rs +++ b/hercules_samples/matmul/build.rs @@ -4,6 +4,9 @@ fn main() { JunoCompiler::new() .ir_in_src("matmul.hir") .unwrap() + //.schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .schedule_in_src("cpu.sch") + .unwrap() .build() .unwrap(); } diff --git a/hercules_samples/matmul/src/cpu.sch b/hercules_samples/matmul/src/cpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..42dda6e3fc02b23e72ca31ef89a83f020bc9bebc --- /dev/null +++ b/hercules_samples/matmul/src/cpu.sch @@ -0,0 +1,14 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); + +ip-sroa(*); +sroa(*); +fork-split(*); +unforkify(*); +dce(*); +float-collections(*); + +gcm(*); diff --git a/hercules_samples/matmul/src/gpu.sch b/hercules_samples/matmul/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..9067a1908c6615a56f917cb4eb435ace93e9ba3a --- /dev/null +++ b/hercules_samples/matmul/src/gpu.sch @@ -0,0 +1,15 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); +gpu(*); +host(matmul); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); + +gcm(*); +xdot[true](*);