From 3f8acd3bb4d0b7388a74c7dc24e3d1e4ad9771b1 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 27 Feb 2025 22:44:17 -0600
Subject: [PATCH] misc. progress for bfs gpu

---
 hercules_cg/src/rt.rs                |  2 +-
 hercules_opt/src/gcm.rs              | 70 ++++++++++++++++------------
 juno_samples/rodinia/bfs/src/gpu.sch | 52 ++++++++++++++-------
 3 files changed, 76 insertions(+), 48 deletions(-)

diff --git a/hercules_cg/src/rt.rs b/hercules_cg/src/rt.rs
index 6981a3da..d94f0e19 100644
--- a/hercules_cg/src/rt.rs
+++ b/hercules_cg/src/rt.rs
@@ -938,7 +938,7 @@ impl<'a> RTContext<'a> {
                     let dst_device = self.node_colors.0[&collect];
                     write!(
                         block,
-                        "::hercules_rt::__copy_{}_to_{}({}.byte_add({} as usize).0, {}.0, {});",
+                        "::hercules_rt::__copy_{}_to_{}({}.byte_add({} as usize).0, {}.0, {} as usize);",
                         src_device.name(),
                         dst_device.name(),
                         self.get_value(collect, bb, false),
diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs
index c612acac..3f326051 100644
--- a/hercules_opt/src/gcm.rs
+++ b/hercules_opt/src/gcm.rs
@@ -1304,37 +1304,49 @@ enum UTerm {
 
 fn unify(
     mut equations: VecDeque<(UTerm, UTerm)>,
-) -> Result<BTreeMap<NodeID, Device>, BTreeMap<NodeID, Device>> {
+) -> Result<BTreeMap<NodeID, Device>, (NodeID, NodeID)> {
     let mut theta = BTreeMap::new();
 
+    // First, assign devices to nodes when a rule directly says to.
+    for _ in 0..equations.len() {
+        let (l, r) = equations.pop_front().unwrap();
+        match (l, r) {
+            (UTerm::Node(n), UTerm::Device(d)) | (UTerm::Device(d), UTerm::Node(n)) => {
+                theta.insert(n, d);
+            }
+            _ => equations.push_back((l, r)),
+        }
+    }
+
+    // Second, iterate the rest of the rules until...
+    // 1. The rules are exhausted. All the nodes have device assignments.
+    // 2. No progress is being made. Some nodes may not have device assignments.
+    // 3. An inconsistency has been found. The inconsistency is returned.
     let mut no_progress_iters = 0;
     while no_progress_iters <= equations.len()
         && let Some((l, r)) = equations.pop_front()
     {
-        match (l, r) {
-            (UTerm::Node(_), UTerm::Node(_)) => {
-                if l != r {
-                    equations.push_back((l, r));
-                }
-                no_progress_iters += 1;
-            }
-            (UTerm::Node(n), UTerm::Device(d)) | (UTerm::Device(d), UTerm::Node(n)) => {
-                theta.insert(n, d);
-                for (l, r) in equations.iter_mut() {
-                    if *l == UTerm::Node(n) {
-                        *l = UTerm::Device(d);
-                    }
-                    if *r == UTerm::Node(n) {
-                        *r = UTerm::Device(d);
-                    }
+        let (UTerm::Node(l), UTerm::Node(r)) = (l, r) else {
+            panic!();
+        };
+
+        match (theta.get(&l), theta.get(&r)) {
+            (Some(ld), Some(rd)) => {
+                if ld != rd {
+                    return Err((l, r));
+                } else {
+                    no_progress_iters = 0;
                 }
-                no_progress_iters = 0;
             }
-            (UTerm::Device(d1), UTerm::Device(d2)) if d1 == d2 => {
+            (Some(d), None) | (None, Some(d)) => {
+                let d = *d;
+                theta.insert(l, d);
+                theta.insert(r, d);
                 no_progress_iters = 0;
             }
-            _ => {
-                return Err(theta);
+            (None, None) => {
+                equations.push_back((UTerm::Node(l), UTerm::Node(r)));
+                no_progress_iters += 1;
             }
         }
     }
@@ -1377,8 +1389,8 @@ fn color_nodes(
             } if !editor.get_type(typing[id.idx()]).is_primitive() => {
                 // Every input to a phi needs to be on the same device. The
                 // phi itself is also on this device.
-                for (l, r) in zip(data.into_iter(), data.into_iter().skip(1).chain(once(&id))) {
-                    equations.push((UTerm::Node(*l), UTerm::Node(*r)));
+                for data in data {
+                    equations.push((UTerm::Node(*data), UTerm::Node(id)));
                 }
             }
             Node::Reduce {
@@ -1394,7 +1406,7 @@ fn color_nodes(
             } if !editor.get_type(typing[id.idx()]).is_primitive() => {
                 // Every input to the reduce, and the reduce itself, are on
                 // the same device.
-                equations.push((UTerm::Node(first), UTerm::Node(second)));
+                equations.push((UTerm::Node(first), UTerm::Node(id)));
                 equations.push((UTerm::Node(second), UTerm::Node(id)));
             }
             Node::Constant { id: _ }
@@ -1533,12 +1545,12 @@ fn color_nodes(
             }
             Some(func_colors)
         }
-        Err(progress) => {
+        Err(inconsistency) => {
             // If unification failed, then there's some node using a node in
-            // `progress` that's expecting a different type than what it got.
-            // Pick one and add potentially inter-device copies on each def-use
-            // edge. We'll clean these up later.
-            let (id, _) = progress.into_iter().next().unwrap();
+            // that's expecting a different type than what it got. Pick one and
+            // add potentially inter-device copies on each def-use edge. We'll
+            // clean these up later.
+            let id = inconsistency.0;
             let users: Vec<_> = editor.get_users(id).collect();
             let success = editor.edit(|mut edit| {
                 let cons = edit.add_zero_constant(typing[id.idx()]);
diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch
index 0a3f4d77..49d44b98 100644
--- a/juno_samples/rodinia/bfs/src/gpu.sch
+++ b/juno_samples/rodinia/bfs/src/gpu.sch
@@ -1,23 +1,39 @@
-gvn(*);
-phi-elim(*);
-dce(*);
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  dce(X);
+  infer-schedules(X);
+}
 
-let outline = auto-outline(bfs);
-gpu(outline.bfs);
+phi-elim(bfs);
+no-memset(bfs@cost);
+let cost_init = outline(bfs@cost_init);
+let loop1 = outline(bfs@loop1);
+let loop2 = outline(bfs@loop2);
+gpu(cost_init, loop1, loop2);
 
-ip-sroa(*);
-sroa(*);
-dce(*);
-gvn(*);
-phi-elim(*);
-dce(*);
+simpl!(*);
+predication(*);
+const-inline(*);
+simpl!(*);
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+}
+simpl!(*);
+predication(*);
+simpl!(*);
 
-//forkify(*);
-infer-schedules(*);
+parallel-reduce(loop1);
+forkify(*);
+fork-guard-elim(*);
+simpl!(*);
+predication(*);
+reduce-slf(*);
+simpl!(*);
 
 gcm(*);
-fixpoint {
-  float-collections(*);
-  dce(*);
-  gcm(*);
-}
+xdot[true](*);
-- 
GitLab