From 9210d7486597ff047263693fd0b817c16c57054f Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 24 Jan 2025 10:48:22 -0600
Subject: [PATCH 1/5] Fix assertion in GCM

---
 hercules_opt/src/gcm.rs | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs
index 5ea9485d..042100de 100644
--- a/hercules_opt/src/gcm.rs
+++ b/hercules_opt/src/gcm.rs
@@ -837,19 +837,16 @@ fn liveness_dataflow(
         liveness.insert(NodeID::new(bb_idx), vec![BTreeSet::new(); insts.len() + 1]);
     }
     let mut num_phis_reduces = vec![0; function.nodes.len()];
-    let mut reducing = vec![false; function.nodes.len()];
+    let mut has_phi = vec![false; function.nodes.len()];
+    let mut has_reduce = vec![false; function.nodes.len()];
     for (node_idx, bb) in bbs.0.iter().enumerate() {
         let node = &function.nodes[node_idx];
         if node.is_phi() || node.is_reduce() {
             num_phis_reduces[bb.idx()] += 1;
-            // Phis and reduces can't be in the same basic block.
-            if node.is_reduce() {
-                assert!(num_phis_reduces[bb.idx()] == 0 || reducing[bb.idx()]);
-                reducing[bb.idx()] = true;
-            } else {
-                assert!(!reducing[bb.idx()]);
-            }
         }
+        has_phi[bb.idx()] = node.is_phi();
+        has_reduce[bb.idx()] = node.is_reduce();
+        assert!(!has_phi[bb.idx()] || !has_reduce[bb.idx()]);
     }
     let is_obj = |id: NodeID| !objects[&func_id].objects(id).is_empty();
 
@@ -861,7 +858,7 @@ fn liveness_dataflow(
             let last_pt = bbs.1[bb.idx()].len();
             let old_value = &liveness[&bb][last_pt];
             let mut new_value = BTreeSet::new();
-            for succ in control_subgraph.succs(*bb).chain(if reducing[bb.idx()] {
+            for succ in control_subgraph.succs(*bb).chain(if has_reduce[bb.idx()] {
                 Either::Left(once(*bb))
             } else {
                 Either::Right(empty())
-- 
GitLab


From 63bfb720a5e485d36d6216b52ae8439362cdde0f Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 24 Jan 2025 11:21:34 -0600
Subject: [PATCH 2/5] Fix bug in loop tree

---
 hercules_ir/src/loops.rs | 29 +++++++++++++++++++----------
 hercules_opt/src/gcm.rs  |  1 -
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/hercules_ir/src/loops.rs b/hercules_ir/src/loops.rs
index 13e935e0..1d706c78 100644
--- a/hercules_ir/src/loops.rs
+++ b/hercules_ir/src/loops.rs
@@ -152,16 +152,7 @@ pub fn loops(
         })
         .collect();
 
-    // Step 6: compute the inverse loop map - this maps control nodes to which
-    // loop they are in (keyed by header), if they are in one.
-    let mut inverse_loops = HashMap::new();
-    for (header, (contents, _)) in loops.iter() {
-        for idx in contents.iter_ones() {
-            inverse_loops.insert(NodeID::new(idx), *header);
-        }
-    }
-
-    // Step 7: compute loop tree nesting.
+    // Step 6: compute loop tree nesting.
     let mut nesting = HashMap::new();
     let mut worklist: VecDeque<NodeID> = loops.keys().map(|id| *id).collect();
     while let Some(header) = worklist.pop_front() {
@@ -175,6 +166,24 @@ pub fn loops(
         }
     }
 
+    // Step 7: compute the inverse loop map - this maps control nodes to which
+    // loop they are in (identified by header), if they are in one. Pick the
+    // most nested loop as the loop they are in.
+    let mut inverse_loops = HashMap::new();
+    for (header, (contents, _)) in loops.iter() {
+        for idx in contents.iter_ones() {
+            let id = NodeID::new(idx);
+            if let Some(old_header) = inverse_loops.get(&id)
+                && nesting[old_header] > nesting[header]
+            {
+                // If the inserted header is more deeply nested, don't do anything.
+                assert!(nesting[old_header] != nesting[header] || old_header == header);
+            } else {
+                inverse_loops.insert(id, *header);
+            }
+        }
+    }
+
     LoopTree {
         root,
         loops,
diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs
index 042100de..1323d5a0 100644
--- a/hercules_opt/src/gcm.rs
+++ b/hercules_opt/src/gcm.rs
@@ -5,7 +5,6 @@ use bitvec::prelude::*;
 use either::Either;
 use union_find::{QuickFindUf, UnionBySize, UnionFind};
 
-use hercules_cg::*;
 use hercules_ir::*;
 
 use crate::*;
-- 
GitLab


From 4a8e4118edb1cad44e0872c27726d65727b43edb Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 24 Jan 2025 11:47:23 -0600
Subject: [PATCH 3/5] hm

---
 hercules_samples/dot/Cargo.toml  |  3 +++
 hercules_samples/dot/build.rs    |  2 ++
 hercules_samples/dot/src/cpu.sch | 12 ++++++++++++
 hercules_samples/dot/src/gpu.sch | 13 +++++++++++++
 4 files changed, 30 insertions(+)
 create mode 100644 hercules_samples/dot/src/cpu.sch
 create mode 100644 hercules_samples/dot/src/gpu.sch

diff --git a/hercules_samples/dot/Cargo.toml b/hercules_samples/dot/Cargo.toml
index 69cd39e3..99a48115 100644
--- a/hercules_samples/dot/Cargo.toml
+++ b/hercules_samples/dot/Cargo.toml
@@ -4,6 +4,9 @@ version = "0.1.0"
 authors = ["Russel Arbore <rarbore2@illinois.edu>"]
 edition = "2021"
 
+[features]
+cuda = ["hercules_rt/cuda"]
+
 [build-dependencies]
 juno_build = { path = "../../juno_build" }
 
diff --git a/hercules_samples/dot/build.rs b/hercules_samples/dot/build.rs
index 2a239bc6..8657fdc1 100644
--- a/hercules_samples/dot/build.rs
+++ b/hercules_samples/dot/build.rs
@@ -4,6 +4,8 @@ fn main() {
     JunoCompiler::new()
         .ir_in_src("dot.hir")
         .unwrap()
+        .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" })
+        .unwrap()
         .build()
         .unwrap();
 }
diff --git a/hercules_samples/dot/src/cpu.sch b/hercules_samples/dot/src/cpu.sch
new file mode 100644
index 00000000..58a7266d
--- /dev/null
+++ b/hercules_samples/dot/src/cpu.sch
@@ -0,0 +1,12 @@
+gvn(*);
+phi-elim(*);
+dce(*);
+
+auto-outline(*);
+
+ip-sroa(*);
+sroa(*);
+unforkify(*);
+dce(*);
+
+gcm(*);
diff --git a/hercules_samples/dot/src/gpu.sch b/hercules_samples/dot/src/gpu.sch
new file mode 100644
index 00000000..956eb996
--- /dev/null
+++ b/hercules_samples/dot/src/gpu.sch
@@ -0,0 +1,13 @@
+gvn(*);
+phi-elim(*);
+dce(*);
+
+auto-outline(*);
+gpu(*);
+host(dot);
+
+ip-sroa(*);
+sroa(*);
+dce(*);
+
+gcm(*);
-- 
GitLab


From 367e853fb29b32a243a718289081d8b58c3550d7 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 24 Jan 2025 12:42:05 -0600
Subject: [PATCH 4/5] comment out gpu schedule for now

---
 hercules_samples/dot/build.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hercules_samples/dot/build.rs b/hercules_samples/dot/build.rs
index 8657fdc1..4cfd2a87 100644
--- a/hercules_samples/dot/build.rs
+++ b/hercules_samples/dot/build.rs
@@ -4,7 +4,8 @@ fn main() {
     JunoCompiler::new()
         .ir_in_src("dot.hir")
         .unwrap()
-        .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" })
+        //.schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" })
+        .schedule_in_src("cpu.sch")
         .unwrap()
         .build()
         .unwrap();
-- 
GitLab


From 19de48e970e4feaeed996efe7d02e112979f7dc0 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 24 Jan 2025 12:55:23 -0600
Subject: [PATCH 5/5] add gpu schedule for matmul, color functions in dot

---
 hercules_ir/src/dot.rs              |  8 +++++++-
 hercules_samples/matmul/Cargo.toml  |  3 +++
 hercules_samples/matmul/build.rs    |  3 +++
 hercules_samples/matmul/src/cpu.sch | 14 ++++++++++++++
 hercules_samples/matmul/src/gpu.sch | 15 +++++++++++++++
 5 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 hercules_samples/matmul/src/cpu.sch
 create mode 100644 hercules_samples/matmul/src/gpu.sch

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 9dd2cb1e..22cd0beb 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -219,7 +219,13 @@ fn write_subgraph_header<W: Write>(
     } else {
         write!(w, "label=\"{}\"\n", function.name)?;
     }
-    write!(w, "bgcolor=ivory4\n")?;
+    let color = match function.device {
+        Some(Device::LLVM) => "paleturquoise1",
+        Some(Device::CUDA) => "darkseagreen1",
+        Some(Device::AsyncRust) => "peachpuff1",
+        None => "ivory2",
+    };
+    write!(w, "bgcolor={}\n", color)?;
     write!(w, "cluster=true\n")?;
     Ok(())
 }
diff --git a/hercules_samples/matmul/Cargo.toml b/hercules_samples/matmul/Cargo.toml
index 9066c153..89e46dd6 100644
--- a/hercules_samples/matmul/Cargo.toml
+++ b/hercules_samples/matmul/Cargo.toml
@@ -4,6 +4,9 @@ version = "0.1.0"
 authors = ["Russel Arbore <rarbore2@illinois.edu>"]
 edition = "2021"
 
+[features]
+cuda = ["hercules_rt/cuda"]
+
 [build-dependencies]
 juno_build = { path = "../../juno_build" }
 
diff --git a/hercules_samples/matmul/build.rs b/hercules_samples/matmul/build.rs
index 08478dea..f895af86 100644
--- a/hercules_samples/matmul/build.rs
+++ b/hercules_samples/matmul/build.rs
@@ -4,6 +4,9 @@ fn main() {
     JunoCompiler::new()
         .ir_in_src("matmul.hir")
         .unwrap()
+        //.schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" })
+        .schedule_in_src("cpu.sch")
+        .unwrap()
         .build()
         .unwrap();
 }
diff --git a/hercules_samples/matmul/src/cpu.sch b/hercules_samples/matmul/src/cpu.sch
new file mode 100644
index 00000000..42dda6e3
--- /dev/null
+++ b/hercules_samples/matmul/src/cpu.sch
@@ -0,0 +1,14 @@
+gvn(*);
+phi-elim(*);
+dce(*);
+
+auto-outline(*);
+
+ip-sroa(*);
+sroa(*);
+fork-split(*);
+unforkify(*);
+dce(*);
+float-collections(*);
+
+gcm(*);
diff --git a/hercules_samples/matmul/src/gpu.sch b/hercules_samples/matmul/src/gpu.sch
new file mode 100644
index 00000000..9067a190
--- /dev/null
+++ b/hercules_samples/matmul/src/gpu.sch
@@ -0,0 +1,15 @@
+gvn(*);
+phi-elim(*);
+dce(*);
+
+auto-outline(*);
+gpu(*);
+host(matmul);
+
+ip-sroa(*);
+sroa(*);
+dce(*);
+float-collections(*);
+
+gcm(*);
+xdot[true](*);
-- 
GitLab