From 00154685ccaee4c128231c2f52c931845e2adc71 Mon Sep 17 00:00:00 2001
From: Russel Arbore <rarbore2@illinois.edu>
Date: Wed, 5 Mar 2025 22:08:40 -0600
Subject: [PATCH 1/2] make bfs faster on cpu

---
 hercules_opt/src/fork_transforms.rs  | 119 +++++++++++++++++++++++++++
 juno_samples/rodinia/bfs/src/cpu.sch |   6 +-
 juno_samples/rodinia/bfs/src/gpu.sch |   5 +-
 3 files changed, 128 insertions(+), 2 deletions(-)

diff --git a/hercules_opt/src/fork_transforms.rs b/hercules_opt/src/fork_transforms.rs
index c46e4e98..bebb8c6c 100644
--- a/hercules_opt/src/fork_transforms.rs
+++ b/hercules_opt/src/fork_transforms.rs
@@ -1515,6 +1515,10 @@ fn fork_fusion(
  * Looks for would-be monoid reduces, if not for a gate on the reduction.
  * Partially predicate the gated reduction to allow for a proper monoid
  * reduction.
+ *
+ * Looks for monoid reduces that occur through a gated write to a single
+ * location, and lift them to a proper monoid reduction with a single gated
+ * write afterwards.
  */
 pub fn clean_monoid_reduces(editor: &mut FunctionEditor, typing: &Vec<TypeID>) {
     for id in editor.node_ids() {
@@ -1676,6 +1680,121 @@ pub fn clean_monoid_reduces(editor: &mut FunctionEditor, typing: &Vec<TypeID>) {
             });
         }
     }
+
+    for id in editor.node_ids() {
+        // Identify reduce/write/phi cycle through which a sparse AND reduction
+        // is occurring.
+        let nodes = &editor.func().nodes;
+        let Some((join, init, reduct)) = nodes[id.idx()].try_reduce() else {
+            continue;
+        };
+        let join_pred = nodes[join.idx()].try_join().unwrap();
+        let join_succ = editor
+            .get_users(join)
+            .filter(|id| nodes[id.idx()].is_control())
+            .next()
+            .unwrap();
+        let Some((_, phi_data)) = nodes[reduct.idx()].try_phi() else {
+            continue;
+        };
+        if phi_data.len() != 2 {
+            continue;
+        }
+        let phi_other_use = if phi_data[0] == id {
+            phi_data[1]
+        } else if phi_data[1] == id {
+            phi_data[0]
+        } else {
+            continue;
+        };
+        let Some((collect, data, indices)) = nodes[phi_other_use.idx()].try_write() else {
+            continue;
+        };
+        if collect != id {
+            continue;
+        }
+        if indices.into_iter().any(|idx| idx.is_position()) {
+            continue;
+        }
+        if !is_false(editor, data) {
+            continue;
+        }
+        let Some(preds) = nodes[join_pred.idx()].try_region() else {
+            continue;
+        };
+        if preds.len() != 2 {
+            continue;
+        }
+        let Some((if1, _)) = nodes[preds[0].idx()].try_control_proj() else {
+            continue;
+        };
+        let Some((if2, sel)) = nodes[preds[1].idx()].try_control_proj() else {
+            continue;
+        };
+        if if1 != if2 {
+            continue;
+        }
+        let Some((_, mut cond)) = nodes[if1.idx()].try_if() else {
+            continue;
+        };
+
+        // Transform to a monoid reduction and a single gated write.
+        let negated = phi_other_use == phi_data[sel];
+        let indices = indices.to_vec().into_boxed_slice();
+        editor.edit(|mut edit| {
+            let t = edit.add_constant(Constant::Boolean(true));
+            let t = edit.add_node(Node::Constant { id: t });
+            let f = edit.add_constant(Constant::Boolean(false));
+            let f = edit.add_node(Node::Constant { id: f });
+            if negated {
+                cond = edit.add_node(Node::Unary {
+                    op: UnaryOperator::Not,
+                    input: cond,
+                });
+            }
+            let reduce_id = NodeID::new(edit.num_node_ids());
+            let and_id = NodeID::new(edit.num_node_ids() + 1);
+            edit.add_node(Node::Reduce {
+                control: join,
+                init: t,
+                reduct: and_id,
+            });
+            edit.add_node(Node::Binary {
+                op: BinaryOperator::And,
+                left: cond,
+                right: reduce_id,
+            });
+
+            let new_if = edit.add_node(Node::If {
+                control: join,
+                cond: reduce_id,
+            });
+            let cpj1 = edit.add_node(Node::ControlProjection {
+                control: new_if,
+                selection: 0,
+            });
+            let cpj2 = edit.add_node(Node::ControlProjection {
+                control: new_if,
+                selection: 1,
+            });
+            let region = edit.add_node(Node::Region {
+                preds: Box::new([cpj1, cpj2]),
+            });
+            let write = edit.add_node(Node::Write {
+                collect: init,
+                data: f,
+                indices,
+            });
+            let phi = edit.add_node(Node::Phi {
+                control: region,
+                data: Box::new([write, init]),
+            });
+            edit = edit.replace_all_uses_where(id, phi, |other_id| {
+                *other_id != phi_other_use && *other_id != reduct
+            })?;
+            edit.replace_all_uses_where(join, region, |id| *id == join_succ)
+        });
+    }
 }
 
 /*
diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch
index f564cd36..07006edb 100644
--- a/juno_samples/rodinia/bfs/src/cpu.sch
+++ b/juno_samples/rodinia/bfs/src/cpu.sch
@@ -54,9 +54,13 @@ if !feature("seq") {
   inline(bfs@cost_init, bfs@loop1, bfs@loop2);
   init = init_body;
 }
-fork-tile[8, 0, false, true](init, traverse, collect);
 delete-uncalled(*);
 const-inline(*);
+clean-monoid-reduces(collect);
+simpl!(*);
+
+fork-tile[8, 0, false, true](init, traverse, collect);
+clean-monoid-reduces(collect);
 simpl!(*);
 
 fork-split(init, traverse, collect);
diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch
index 541d15d7..11a1723f 100644
--- a/juno_samples/rodinia/bfs/src/gpu.sch
+++ b/juno_samples/rodinia/bfs/src/gpu.sch
@@ -31,6 +31,8 @@ predication(*);
 simpl!(*);
 reduce-slf(*);
 simpl!(*);
+slf(*);
+simpl!(*);
 
 fixpoint {
   forkify(collect);
@@ -39,6 +41,7 @@ fixpoint {
 simpl!(collect);
 
 fork-tile[1024, 0, false, true](init, traverse, collect);
-fork-split(init, traverse, collect);
+let out = fork-split(init, traverse, collect);
+simpl!(*);
 
 gcm(*);
-- 
GitLab


From 08e67ade68f310063276b91c5dfbbc5552949b80 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 5 Mar 2025 22:18:03 -0600
Subject: [PATCH 2/2] sigh

---
 juno_samples/rodinia/bfs/src/gpu.sch | 2 --
 1 file changed, 2 deletions(-)

diff --git a/juno_samples/rodinia/bfs/src/gpu.sch b/juno_samples/rodinia/bfs/src/gpu.sch
index 11a1723f..ea81f330 100644
--- a/juno_samples/rodinia/bfs/src/gpu.sch
+++ b/juno_samples/rodinia/bfs/src/gpu.sch
@@ -31,8 +31,6 @@ predication(*);
 simpl!(*);
 reduce-slf(*);
 simpl!(*);
-slf(*);
-simpl!(*);
 
 fixpoint {
   forkify(collect);
-- 
GitLab