From 0303d9e8f568562d18606110627a1bba2702c303 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 27 Feb 2025 09:45:12 -0600
Subject: [PATCH 1/9] Need LTE in loop-bound-canon for backprop

---
 juno_samples/rodinia/backprop/src/backprop.jn |  2 +-
 juno_samples/rodinia/backprop/src/cpu.sch     | 37 ++++++++++---------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/juno_samples/rodinia/backprop/src/backprop.jn b/juno_samples/rodinia/backprop/src/backprop.jn
index c7f4345b..356bb3d9 100644
--- a/juno_samples/rodinia/backprop/src/backprop.jn
+++ b/juno_samples/rodinia/backprop/src/backprop.jn
@@ -4,7 +4,7 @@ fn squash(x: f32) -> f32 {
 }
 
 fn layer_forward<n, m: usize>(vals: f32[n + 1], weights: f32[n + 1, m + 1]) -> f32[m + 1] {
-  let result : f32[m + 1];
+  @res let result : f32[m + 1];
   result[0] = 1.0;
 
   for j in 1..=m {
diff --git a/juno_samples/rodinia/backprop/src/cpu.sch b/juno_samples/rodinia/backprop/src/cpu.sch
index 56fc2c9a..b6e69a27 100644
--- a/juno_samples/rodinia/backprop/src/cpu.sch
+++ b/juno_samples/rodinia/backprop/src/cpu.sch
@@ -1,24 +1,25 @@
-gvn(*);
-dce(*);
-phi-elim(*);
-dce(*);
-crc(*);
-dce(*);
-slf(*);
-dce(*);
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  dce(X);
+  infer-schedules(X);
+}
 
-let auto = auto-outline(backprop);
-cpu(auto.backprop);
-
-inline(auto.backprop);
-inline(auto.backprop);
+simpl!(*);
+inline(layer_forward);
 delete-uncalled(*);
 
-sroa[true](*);
-dce(*);
-float-collections(*);
-reuse-products(*);
-dce(*);
+no-memset(layer_forward@res);
+lift-dc-math(*);
+loop-bound-canon(*);
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+  fork-coalesce(*);
+}
 
 gcm(*);
 
-- 
GitLab


From 7c66673ddcd5a166d88f00758a0c42ff64b0c25e Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 27 Feb 2025 09:58:20 -0600
Subject: [PATCH 2/9] BFS needs const inlining

---
 juno_samples/rodinia/backprop/src/cpu.sch |  1 -
 juno_samples/rodinia/bfs/build.rs         |  2 ++
 juno_samples/rodinia/bfs/src/bfs.jn       |  8 +++----
 juno_samples/rodinia/bfs/src/cpu.sch      | 26 +++++++++++++++++++++++
 4 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 juno_samples/rodinia/bfs/src/cpu.sch

diff --git a/juno_samples/rodinia/backprop/src/cpu.sch b/juno_samples/rodinia/backprop/src/cpu.sch
index b6e69a27..fa3dccf1 100644
--- a/juno_samples/rodinia/backprop/src/cpu.sch
+++ b/juno_samples/rodinia/backprop/src/cpu.sch
@@ -22,4 +22,3 @@ fixpoint {
 }
 
 gcm(*);
-
diff --git a/juno_samples/rodinia/bfs/build.rs b/juno_samples/rodinia/bfs/build.rs
index c19bae5d..bb8f9ff5 100644
--- a/juno_samples/rodinia/bfs/build.rs
+++ b/juno_samples/rodinia/bfs/build.rs
@@ -13,6 +13,8 @@ fn main() {
     JunoCompiler::new()
         .file_in_src("bfs.jn")
         .unwrap()
+        .schedule_in_src("cpu.sch")
+        .unwrap()
         .build()
         .unwrap();
 }
diff --git a/juno_samples/rodinia/bfs/src/bfs.jn b/juno_samples/rodinia/bfs/src/bfs.jn
index cf2ea086..2e61f1a0 100644
--- a/juno_samples/rodinia/bfs/src/bfs.jn
+++ b/juno_samples/rodinia/bfs/src/bfs.jn
@@ -13,8 +13,8 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n]
   let visited: bool[n];
   visited[source as u64] = true;
   
-  let cost: i32[n];
-  for i in 0..n {
+  @cost let cost: i32[n];
+  @cost_init for i in 0..n {
     cost[i] = -1;
   }
   cost[source as u64] = 0;
@@ -25,7 +25,7 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n]
   while !stop {
     stop = true;
 
-    for i in 0..n {
+    @loop1 for i in 0..n {
       if mask[i] {
         mask[i] = false;
 
@@ -42,7 +42,7 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n]
       }
     }
 
-    for i in 0..n {
+    @loop2 for i in 0..n {
       if updated[i] {
         mask[i] = true;
         visited[i] = true;
diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch
new file mode 100644
index 00000000..1a94ea55
--- /dev/null
+++ b/juno_samples/rodinia/bfs/src/cpu.sch
@@ -0,0 +1,26 @@
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  dce(X);
+  infer-schedules(X);
+}
+
+phi-elim(bfs);
+no-memset(bfs@cost);
+outline(bfs@cost_init);
+let loop1 = outline(bfs@loop1);
+let loop2 = outline(bfs@loop2);
+
+simpl!(*);
+predication(*);
+
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+  fork-coalesce(*);
+}
+
+gcm(*);
-- 
GitLab


From b336327e69c03752217bcc613ac3374e880367b1 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 27 Feb 2025 10:52:10 -0600
Subject: [PATCH 3/9] Skeleton and analysis for const inline pass

---
 hercules_opt/src/inline.rs           | 134 ++++++++++++++++++++++++++-
 juno_samples/rodinia/bfs/src/cpu.sch |   1 +
 juno_scheduler/src/compile.rs        |   1 +
 juno_scheduler/src/ir.rs             |   3 +-
 juno_scheduler/src/pm.rs             |  18 ++++
 5 files changed, 153 insertions(+), 4 deletions(-)

diff --git a/hercules_opt/src/inline.rs b/hercules_opt/src/inline.rs
index 99187dd2..c94ad962 100644
--- a/hercules_opt/src/inline.rs
+++ b/hercules_opt/src/inline.rs
@@ -1,8 +1,7 @@
+use std::cell::Ref;
 use std::collections::HashMap;
 
-use hercules_ir::callgraph::*;
-use hercules_ir::def_use::*;
-use hercules_ir::ir::*;
+use hercules_ir::*;
 
 use crate::*;
 
@@ -235,3 +234,132 @@ fn inline_func(
         });
     }
 }
+
+#[derive(Clone, Debug, Copy)]
+enum ParameterLattice {
+    Top,
+    Constant(ConstantID),
+    // Dynamic constant
+    DynamicConstant(DynamicConstantID, FunctionID),
+    Bottom,
+}
+
+impl ParameterLattice {
+    fn from_node(node: &Node, func_id: FunctionID) -> Self {
+        use ParameterLattice::*;
+        match node {
+            Node::Undef { ty: _ } => Top,
+            Node::Constant { id } => Constant(*id),
+            Node::DynamicConstant { id } => DynamicConstant(*id, func_id),
+            _ => Bottom,
+        }
+    }
+
+    fn try_const_dc(self, dcs: Ref<'_, Vec<DynamicConstant>>) -> Option<usize> {
+        if let ParameterLattice::DynamicConstant(id, _) = self
+            && let DynamicConstant::Constant(val) = &dcs[id.idx()]
+        {
+            Some(*val)
+        } else {
+            None
+        }
+    }
+
+    fn meet(&mut self, b: Self, cons: Ref<'_, Vec<Constant>>, dcs: Ref<'_, Vec<DynamicConstant>>) {
+        use ParameterLattice::*;
+        *self = match (*self, b) {
+            (Top, b) => b,
+            (a, Top) => a,
+            (Bottom, _) | (_, Bottom) => Bottom,
+            (Constant(id_a), Constant(id_b)) => {
+                if id_a == id_b {
+                    Constant(id_a)
+                } else {
+                    Bottom
+                }
+            }
+            (DynamicConstant(dc_a, f_a), DynamicConstant(dc_b, f_b)) => {
+                if dc_a == dc_b && f_a == f_b {
+                    DynamicConstant(dc_a, f_a)
+                } else if let (
+                    ir::DynamicConstant::Constant(dcv_a),
+                    ir::DynamicConstant::Constant(dcv_b),
+                ) = (&dcs[dc_a.idx()], &dcs[dc_b.idx()])
+                    && *dcv_a == *dcv_b
+                {
+                    DynamicConstant(dc_a, f_a)
+                } else {
+                    Bottom
+                }
+            }
+            (DynamicConstant(dc, _), Constant(con)) | (Constant(con), DynamicConstant(dc, _)) => {
+                match (&cons[con.idx()], &dcs[dc.idx()]) {
+                    (ir::Constant::UnsignedInteger64(conv), ir::DynamicConstant::Constant(dcv))
+                        if *conv as usize == *dcv =>
+                    {
+                        Constant(con)
+                    }
+                    _ => Bottom,
+                }
+            }
+        }
+    }
+}
+
+/*
+ * Top level function to inline constant parameters and constant dynamic
+ * constant parameters. Identifies functions that are:
+ *
+ * 1. Not marked as entry.
+ * 2. At every call site, a particular parameter is always a specific constant
+ *    or dynamic constant OR a particular dynamic constant parameter is always a
+ *    specific constant.
+ *
+ * These functions can have that constant "inlined" - the parameter is removed
+ * and all uses of the parameter becomes uses of the constant directly.
+ */
+pub fn const_inline(editors: &mut [FunctionEditor], callgraph: &CallGraph) {
+    // Run const inlining on each function, starting at the most shallow
+    // function first, since we want to propagate constants down the call graph.
+    for func_id in callgraph.topo().into_iter().rev() {
+        let func = editors[func_id.idx()].func();
+        if func.entry || callgraph.num_callers(func_id) == 0 {
+            continue;
+        }
+
+        // Figure out what we know about the parameters (both normal and dynamic
+        // constant) to this function.
+        let mut param_lattice = vec![ParameterLattice::Top; func.param_types.len()];
+        let mut dc_param_lattice = vec![ParameterLattice::Top; func.num_dynamic_constants as usize];
+        for caller in callgraph.get_callers(func_id) {
+            let editor = &editors[caller.idx()];
+            let nodes = &editor.func().nodes;
+            for id in editor.node_ids() {
+                if let Some((_, callee, dc_args, args)) = nodes[id.idx()].try_call()
+                    && callee == func_id
+                {
+                    for (idx, id) in args.into_iter().enumerate() {
+                        let lattice = ParameterLattice::from_node(&nodes[id.idx()], callee);
+                        param_lattice[idx].meet(
+                            lattice,
+                            editor.get_constants(),
+                            editor.get_dynamic_constants(),
+                        );
+                    }
+
+                    for (idx, id) in dc_args.into_iter().enumerate() {
+                        let lattice = ParameterLattice::DynamicConstant(*id, func_id);
+                        dc_param_lattice[idx].meet(
+                            lattice,
+                            editor.get_constants(),
+                            editor.get_dynamic_constants(),
+                        );
+                    }
+                }
+            }
+        }
+        println!("{}:", func.name);
+        println!("{:?}", param_lattice);
+        println!("{:?}", dc_param_lattice);
+    }
+}
diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch
index 1a94ea55..d201765e 100644
--- a/juno_samples/rodinia/bfs/src/cpu.sch
+++ b/juno_samples/rodinia/bfs/src/cpu.sch
@@ -22,5 +22,6 @@ fixpoint {
   fork-guard-elim(*);
   fork-coalesce(*);
 }
+const-inline(*);
 
 gcm(*);
diff --git a/juno_scheduler/src/compile.rs b/juno_scheduler/src/compile.rs
index 3c288ca7..bd27350a 100644
--- a/juno_scheduler/src/compile.rs
+++ b/juno_scheduler/src/compile.rs
@@ -112,6 +112,7 @@ impl FromStr for Appliable {
             "ccp" => Ok(Appliable::Pass(ir::Pass::CCP)),
             "crc" | "collapse-read-chains" => Ok(Appliable::Pass(ir::Pass::CRC)),
             "clean-monoid-reduces" => Ok(Appliable::Pass(ir::Pass::CleanMonoidReduces)),
+            "const-inline" => Ok(Appliable::Pass(ir::Pass::ConstInline)),
             "dce" => Ok(Appliable::Pass(ir::Pass::DCE)),
             "delete-uncalled" => Ok(Appliable::DeleteUncalled),
             "float-collections" | "collections" => Ok(Appliable::Pass(ir::Pass::FloatCollections)),
diff --git a/juno_scheduler/src/ir.rs b/juno_scheduler/src/ir.rs
index 3a087c0d..a0db8844 100644
--- a/juno_scheduler/src/ir.rs
+++ b/juno_scheduler/src/ir.rs
@@ -8,8 +8,9 @@ pub enum Pass {
     ArrayToProduct,
     AutoOutline,
     CCP,
-    CleanMonoidReduces,
     CRC,
+    CleanMonoidReduces,
+    ConstInline,
     DCE,
     FloatCollections,
     ForkChunk,
diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs
index 5f2fa4cc..3c142700 100644
--- a/juno_scheduler/src/pm.rs
+++ b/juno_scheduler/src/pm.rs
@@ -1833,6 +1833,24 @@ fn run_pass(
             pm.delete_gravestones();
             pm.clear_analyses();
         }
+        Pass::ConstInline => {
+            assert!(args.is_empty());
+            pm.make_callgraph();
+            let callgraph = pm.callgraph.take().unwrap();
+
+            let mut editors: Vec<_> = build_selection(pm, selection, true)
+                .into_iter()
+                .map(|editor| editor.unwrap())
+                .collect();
+            const_inline(&mut editors, &callgraph);
+
+            for func in editors {
+                changed |= func.modified();
+            }
+
+            pm.delete_gravestones();
+            pm.clear_analyses();
+        }
         Pass::CRC => {
             assert!(args.is_empty());
             for func in build_selection(pm, selection, false) {
-- 
GitLab


From 4572a7ec7ba13913b86127bdd3fa2edfa73f26ac Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 27 Feb 2025 12:58:53 -0600
Subject: [PATCH 4/9] Inline constants

---
 hercules_opt/src/editor.rs           |  12 +++
 hercules_opt/src/inline.rs           | 126 +++++++++++++++++++++++----
 juno_samples/rodinia/bfs/src/cpu.sch |   1 +
 3 files changed, 121 insertions(+), 18 deletions(-)

diff --git a/hercules_opt/src/editor.rs b/hercules_opt/src/editor.rs
index 17cea325..0e332a00 100644
--- a/hercules_opt/src/editor.rs
+++ b/hercules_opt/src/editor.rs
@@ -880,6 +880,18 @@ impl<'a, 'b> FunctionEdit<'a, 'b> {
         }
     }
 
+    pub fn get_param_types(&self) -> &Vec<TypeID> {
+        self.updated_param_types
+            .as_ref()
+            .unwrap_or(&self.editor.function.param_types)
+    }
+
+    pub fn get_return_types(&self) -> &Vec<TypeID> {
+        self.updated_return_types
+            .as_ref()
+            .unwrap_or(&self.editor.function.return_types)
+    }
+
     pub fn set_param_types(&mut self, tys: Vec<TypeID>) {
         self.updated_param_types = Some(tys);
     }
diff --git a/hercules_opt/src/inline.rs b/hercules_opt/src/inline.rs
index c94ad962..895f1f73 100644
--- a/hercules_opt/src/inline.rs
+++ b/hercules_opt/src/inline.rs
@@ -235,7 +235,7 @@ fn inline_func(
     }
 }
 
-#[derive(Clone, Debug, Copy)]
+#[derive(Clone, Debug, Copy, PartialEq, Eq)]
 enum ParameterLattice {
     Top,
     Constant(ConstantID),
@@ -331,6 +331,7 @@ pub fn const_inline(editors: &mut [FunctionEditor], callgraph: &CallGraph) {
         // constant) to this function.
         let mut param_lattice = vec![ParameterLattice::Top; func.param_types.len()];
         let mut dc_param_lattice = vec![ParameterLattice::Top; func.num_dynamic_constants as usize];
+        let mut callers = vec![];
         for caller in callgraph.get_callers(func_id) {
             let editor = &editors[caller.idx()];
             let nodes = &editor.func().nodes;
@@ -338,28 +339,117 @@ pub fn const_inline(editors: &mut [FunctionEditor], callgraph: &CallGraph) {
                 if let Some((_, callee, dc_args, args)) = nodes[id.idx()].try_call()
                     && callee == func_id
                 {
-                    for (idx, id) in args.into_iter().enumerate() {
-                        let lattice = ParameterLattice::from_node(&nodes[id.idx()], callee);
-                        param_lattice[idx].meet(
-                            lattice,
-                            editor.get_constants(),
-                            editor.get_dynamic_constants(),
-                        );
+                    if editor.is_mutable(id) {
+                        for (idx, id) in args.into_iter().enumerate() {
+                            let lattice = ParameterLattice::from_node(&nodes[id.idx()], callee);
+                            param_lattice[idx].meet(
+                                lattice,
+                                editor.get_constants(),
+                                editor.get_dynamic_constants(),
+                            );
+                        }
+
+                        for (idx, id) in dc_args.into_iter().enumerate() {
+                            let lattice = ParameterLattice::DynamicConstant(*id, func_id);
+                            dc_param_lattice[idx].meet(
+                                lattice,
+                                editor.get_constants(),
+                                editor.get_dynamic_constants(),
+                            );
+                        }
+                    } else {
+                        // If we can't modify the call node in the caller, then
+                        // we can't perform the inlining.
+                        param_lattice = vec![ParameterLattice::Bottom; func.param_types.len()];
+                        dc_param_lattice =
+                            vec![ParameterLattice::Bottom; func.num_dynamic_constants as usize];
                     }
+                    callers.push((caller, id));
+                }
+            }
+        }
+        if param_lattice.iter().all(|v| *v == ParameterLattice::Bottom)
+            && dc_param_lattice
+                .iter()
+                .all(|v| *v == ParameterLattice::Bottom)
+        {
+            continue;
+        }
 
-                    for (idx, id) in dc_args.into_iter().enumerate() {
-                        let lattice = ParameterLattice::DynamicConstant(*id, func_id);
-                        dc_param_lattice[idx].meet(
-                            lattice,
-                            editor.get_constants(),
-                            editor.get_dynamic_constants(),
-                        );
+        // Replace the arguments.
+        let editor = &mut editors[func_id.idx()];
+        let mut param_idx_to_ids: HashMap<usize, Vec<NodeID>> = HashMap::new();
+        for id in editor.node_ids() {
+            if let Some(idx) = editor.func().nodes[id.idx()].try_parameter() {
+                param_idx_to_ids.entry(idx).or_default().push(id);
+            }
+        }
+        let mut params_to_remove = vec![];
+        let success = editor.edit(|mut edit| {
+            let mut param_tys = edit.get_param_types().clone();
+            let mut decrement_index_by = 0;
+            for idx in 0..param_tys.len() {
+                if let Some(node) = match param_lattice[idx] {
+                    ParameterLattice::Top => Some(Node::Undef { ty: param_tys[idx] }),
+                    ParameterLattice::Constant(id) => Some(Node::Constant { id }),
+                    _ => None,
+                } && let Some(ids) = param_idx_to_ids.get(&idx)
+                {
+                    let node = edit.add_node(node);
+                    for id in ids {
+                        edit = edit.replace_all_uses(*id, node)?;
+                        edit = edit.delete_node(*id)?;
+                    }
+                    param_tys.remove(idx - decrement_index_by);
+                    params_to_remove.push(idx);
+                    decrement_index_by += 1;
+                } else if decrement_index_by != 0
+                    && let Some(ids) = param_idx_to_ids.get(&idx)
+                {
+                    let node = edit.add_node(Node::Parameter {
+                        index: idx - decrement_index_by,
+                    });
+                    for id in ids {
+                        edit = edit.replace_all_uses(*id, node)?;
+                        edit = edit.delete_node(*id)?;
                     }
                 }
             }
+            edit.set_param_types(param_tys);
+            Ok(edit)
+        });
+        params_to_remove.reverse();
+
+        // Update callers.
+        if success {
+            for (caller, call) in callers {
+                let editor = &mut editors[caller.idx()];
+                let success = editor.edit(|mut edit| {
+                    let Node::Call {
+                        control,
+                        function,
+                        dynamic_constants,
+                        args,
+                    } = edit.get_node(call).clone()
+                    else {
+                        panic!();
+                    };
+                    let mut args = args.into_vec();
+                    for idx in params_to_remove.iter() {
+                        args.remove(*idx);
+                    }
+                    let node = edit.add_node(Node::Call {
+                        control,
+                        function,
+                        dynamic_constants,
+                        args: args.into_boxed_slice(),
+                    });
+                    edit = edit.replace_all_uses(call, node)?;
+                    edit = edit.delete_node(call)?;
+                    Ok(edit)
+                });
+                assert!(success);
+            }
         }
-        println!("{}:", func.name);
-        println!("{:?}", param_lattice);
-        println!("{:?}", dc_param_lattice);
     }
 }
diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch
index d201765e..04ffeaa6 100644
--- a/juno_samples/rodinia/bfs/src/cpu.sch
+++ b/juno_samples/rodinia/bfs/src/cpu.sch
@@ -23,5 +23,6 @@ fixpoint {
   fork-coalesce(*);
 }
 const-inline(*);
+simpl!(*);
 
 gcm(*);
-- 
GitLab


From 84dfd900348e6bf47b1b6e9dad5ff27776f50541 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 27 Feb 2025 13:08:55 -0600
Subject: [PATCH 5/9] Inline dynamic constants

---
 hercules_opt/src/inline.rs          | 47 +++++++++--------------------
 juno_samples/rodinia/bfs/src/bfs.jn |  2 +-
 2 files changed, 16 insertions(+), 33 deletions(-)

diff --git a/hercules_opt/src/inline.rs b/hercules_opt/src/inline.rs
index 895f1f73..38ed1b22 100644
--- a/hercules_opt/src/inline.rs
+++ b/hercules_opt/src/inline.rs
@@ -255,16 +255,6 @@ impl ParameterLattice {
         }
     }
 
-    fn try_const_dc(self, dcs: Ref<'_, Vec<DynamicConstant>>) -> Option<usize> {
-        if let ParameterLattice::DynamicConstant(id, _) = self
-            && let DynamicConstant::Constant(val) = &dcs[id.idx()]
-        {
-            Some(*val)
-        } else {
-            None
-        }
-    }
-
     fn meet(&mut self, b: Self, cons: Ref<'_, Vec<Constant>>, dcs: Ref<'_, Vec<DynamicConstant>>) {
         use ParameterLattice::*;
         *self = match (*self, b) {
@@ -312,8 +302,7 @@ impl ParameterLattice {
  *
  * 1. Not marked as entry.
  * 2. At every call site, a particular parameter is always a specific constant
- *    or dynamic constant OR a particular dynamic constant parameter is always a
- *    specific constant.
+ *    or dynamic constant.
  *
  * These functions can have that constant "inlined" - the parameter is removed
  * and all uses of the parameter becomes uses of the constant directly.
@@ -327,16 +316,14 @@ pub fn const_inline(editors: &mut [FunctionEditor], callgraph: &CallGraph) {
             continue;
         }
 
-        // Figure out what we know about the parameters (both normal and dynamic
-        // constant) to this function.
+        // Figure out what we know about the parameters to this function.
         let mut param_lattice = vec![ParameterLattice::Top; func.param_types.len()];
-        let mut dc_param_lattice = vec![ParameterLattice::Top; func.num_dynamic_constants as usize];
         let mut callers = vec![];
         for caller in callgraph.get_callers(func_id) {
             let editor = &editors[caller.idx()];
             let nodes = &editor.func().nodes;
             for id in editor.node_ids() {
-                if let Some((_, callee, dc_args, args)) = nodes[id.idx()].try_call()
+                if let Some((_, callee, _, args)) = nodes[id.idx()].try_call()
                     && callee == func_id
                 {
                     if editor.is_mutable(id) {
@@ -348,31 +335,16 @@ pub fn const_inline(editors: &mut [FunctionEditor], callgraph: &CallGraph) {
                                 editor.get_dynamic_constants(),
                             );
                         }
-
-                        for (idx, id) in dc_args.into_iter().enumerate() {
-                            let lattice = ParameterLattice::DynamicConstant(*id, func_id);
-                            dc_param_lattice[idx].meet(
-                                lattice,
-                                editor.get_constants(),
-                                editor.get_dynamic_constants(),
-                            );
-                        }
                     } else {
                         // If we can't modify the call node in the caller, then
                         // we can't perform the inlining.
                         param_lattice = vec![ParameterLattice::Bottom; func.param_types.len()];
-                        dc_param_lattice =
-                            vec![ParameterLattice::Bottom; func.num_dynamic_constants as usize];
                     }
                     callers.push((caller, id));
                 }
             }
         }
-        if param_lattice.iter().all(|v| *v == ParameterLattice::Bottom)
-            && dc_param_lattice
-                .iter()
-                .all(|v| *v == ParameterLattice::Bottom)
-        {
+        if param_lattice.iter().all(|v| *v == ParameterLattice::Bottom) {
             continue;
         }
 
@@ -392,6 +364,17 @@ pub fn const_inline(editors: &mut [FunctionEditor], callgraph: &CallGraph) {
                 if let Some(node) = match param_lattice[idx] {
                     ParameterLattice::Top => Some(Node::Undef { ty: param_tys[idx] }),
                     ParameterLattice::Constant(id) => Some(Node::Constant { id }),
+                    ParameterLattice::DynamicConstant(id, _) => {
+                        // Rust moment.
+                        let maybe_cons = edit.get_dynamic_constant(id).try_constant();
+                        if let Some(val) = maybe_cons {
+                            Some(Node::DynamicConstant {
+                                id: edit.add_dynamic_constant(DynamicConstant::Constant(val)),
+                            })
+                        } else {
+                            None
+                        }
+                    }
                     _ => None,
                 } && let Some(ids) = param_idx_to_ids.get(&idx)
                 {
diff --git a/juno_samples/rodinia/bfs/src/bfs.jn b/juno_samples/rodinia/bfs/src/bfs.jn
index 2e61f1a0..51dcd945 100644
--- a/juno_samples/rodinia/bfs/src/bfs.jn
+++ b/juno_samples/rodinia/bfs/src/bfs.jn
@@ -13,7 +13,7 @@ fn bfs<n, m: usize>(graph_nodes: Node[n], source: u32, edges: u32[m]) -> i32[n]
   let visited: bool[n];
   visited[source as u64] = true;
   
-  @cost let cost: i32[n];
+  @cost @cost_init let cost: i32[n];
   @cost_init for i in 0..n {
     cost[i] = -1;
   }
-- 
GitLab


From d4268c90491efae0db1eeaa99755be9336a7bdbe Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 27 Feb 2025 13:12:40 -0600
Subject: [PATCH 6/9] forkify bfs

---
 juno_samples/rodinia/bfs/src/cpu.sch | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/juno_samples/rodinia/bfs/src/cpu.sch b/juno_samples/rodinia/bfs/src/cpu.sch
index 04ffeaa6..44cfa8ad 100644
--- a/juno_samples/rodinia/bfs/src/cpu.sch
+++ b/juno_samples/rodinia/bfs/src/cpu.sch
@@ -16,13 +16,14 @@ let loop2 = outline(bfs@loop2);
 
 simpl!(*);
 predication(*);
-
+const-inline(*);
+simpl!(*);
 fixpoint {
   forkify(*);
   fork-guard-elim(*);
-  fork-coalesce(*);
 }
-const-inline(*);
 simpl!(*);
 
+unforkify(*);
+
 gcm(*);
-- 
GitLab


From 99b1d2945e2d63ff6dbe3e68f5bd3a2b2c2fe2cb Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 27 Feb 2025 13:56:11 -0600
Subject: [PATCH 7/9] cfd cpu schedules, add module name to name of generated
 functions to avoid collisions

---
 hercules_cg/src/cpu.rs                        | 19 +++++---
 hercules_cg/src/gpu.rs                        |  9 ++--
 hercules_cg/src/rt.rs                         | 18 +++++---
 juno_samples/rodinia/cfd/src/cpu_euler.sch    | 43 +++++++++++--------
 .../rodinia/cfd/src/cpu_pre_euler.sch         | 43 +++++++++++--------
 juno_scheduler/src/pm.rs                      |  3 ++
 6 files changed, 85 insertions(+), 50 deletions(-)

diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs
index b15cf301..37bf814d 100644
--- a/hercules_cg/src/cpu.rs
+++ b/hercules_cg/src/cpu.rs
@@ -16,6 +16,7 @@ static NUM_FILLER_REGS: AtomicUsize = AtomicUsize::new(0);
  * LLVM bindings for Rust, and we are *not* writing any C++.
  */
 pub fn cpu_codegen<W: Write>(
+    module_name: &str,
     function: &Function,
     types: &Vec<Type>,
     constants: &Vec<Constant>,
@@ -27,6 +28,7 @@ pub fn cpu_codegen<W: Write>(
     w: &mut W,
 ) -> Result<(), Error> {
     let ctx = CPUContext {
+        module_name,
         function,
         types,
         constants,
@@ -40,6 +42,7 @@ pub fn cpu_codegen<W: Write>(
 }
 
 struct CPUContext<'a> {
+    module_name: &'a str,
     function: &'a Function,
     types: &'a Vec<Type>,
     constants: &'a Vec<Constant>,
@@ -65,16 +68,18 @@ impl<'a> CPUContext<'a> {
             if self.types[return_type.idx()].is_primitive() {
                 write!(
                     w,
-                    "define dso_local {} @{}(",
+                    "define dso_local {} @{}_{}(",
                     self.get_type(return_type),
-                    self.function.name
+                    self.module_name,
+                    self.function.name,
                 )?;
             } else {
                 write!(
                     w,
-                    "define dso_local nonnull noundef {} @{}(",
+                    "define dso_local nonnull noundef {} @{}_{}(",
                     self.get_type(return_type),
-                    self.function.name
+                    self.module_name,
+                    self.function.name,
                 )?;
             }
         } else {
@@ -89,7 +94,11 @@ impl<'a> CPUContext<'a> {
                     .collect::<Vec<_>>()
                     .join(", "),
             )?;
-            write!(w, "define dso_local void @{}(", self.function.name,)?;
+            write!(
+                w,
+                "define dso_local void @{}_{}(",
+                self.module_name, self.function.name,
+            )?;
         }
         let mut first_param = true;
         // The first parameter is a pointer to CPU backing memory, if it's
diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs
index a3eea274..1ba8302a 100644
--- a/hercules_cg/src/gpu.rs
+++ b/hercules_cg/src/gpu.rs
@@ -14,6 +14,7 @@ use crate::*;
  * of similarities with the CPU LLVM generation plus custom GPU parallelization.
  */
 pub fn gpu_codegen<W: Write>(
+    module_name: &str,
     function: &Function,
     types: &Vec<Type>,
     constants: &Vec<Constant>,
@@ -170,6 +171,7 @@ pub fn gpu_codegen<W: Write>(
     };
 
     let ctx = GPUContext {
+        module_name,
         function,
         types,
         constants,
@@ -199,6 +201,7 @@ struct GPUKernelParams {
 }
 
 struct GPUContext<'a> {
+    module_name: &'a str,
     function: &'a Function,
     types: &'a Vec<Type>,
     constants: &'a Vec<Constant>,
@@ -395,8 +398,8 @@ namespace cg = cooperative_groups;
     fn codegen_kernel_begin<W: Write>(&self, w: &mut W) -> Result<(), Error> {
         write!(
             w,
-            "__global__ void __launch_bounds__({}) {}_gpu(",
-            self.kernel_params.max_num_threads, self.function.name
+            "__global__ void __launch_bounds__({}) {}_{}_gpu(",
+            self.kernel_params.max_num_threads, self.module_name, self.function.name
         )?;
         let mut first_param = true;
         // The first parameter is a pointer to GPU backing memory, if it's
@@ -645,7 +648,7 @@ namespace cg = cooperative_groups;
         } else {
             write!(w, "{}", self.get_type(self.function.return_types[0], false))?;
         }
-        write!(w, " {}(", self.function.name)?;
+        write!(w, " {}_{}(", self.module_name, self.function.name)?;
 
         let mut first_param = true;
         // The first parameter is a pointer to GPU backing memory, if it's
diff --git a/hercules_cg/src/rt.rs b/hercules_cg/src/rt.rs
index 8fa0c09e..6981a3da 100644
--- a/hercules_cg/src/rt.rs
+++ b/hercules_cg/src/rt.rs
@@ -74,6 +74,7 @@ use crate::*;
  * set some CUDA memory - the user can then take a CUDA reference to that box.
  */
 pub fn rt_codegen<W: Write>(
+    module_name: &str,
     func_id: FunctionID,
     module: &Module,
     def_use: &ImmutableDefUseMap,
@@ -96,6 +97,7 @@ pub fn rt_codegen<W: Write>(
         .map(|(fork, join)| (*join, *fork))
         .collect();
     let ctx = RTContext {
+        module_name,
         func_id,
         module,
         def_use,
@@ -117,6 +119,7 @@ pub fn rt_codegen<W: Write>(
 }
 
 struct RTContext<'a> {
+    module_name: &'a str,
     func_id: FunctionID,
     module: &'a Module,
     def_use: &'a ImmutableDefUseMap,
@@ -157,7 +160,8 @@ impl<'a> RTContext<'a> {
         // Dump the function signature.
         write!(
             w,
-            "#[allow(unused_assignments,unused_variables,unused_mut,unused_parens,unused_unsafe,non_snake_case)]async unsafe fn {}(",
+            "#[allow(unused_assignments,unused_variables,unused_mut,unused_parens,unused_unsafe,non_snake_case)]async unsafe fn {}_{}(",
+            self.module_name,
             func.name
         )?;
         let mut first_param = true;
@@ -236,7 +240,7 @@ impl<'a> RTContext<'a> {
                 // Create the return struct
                 write!(w, "let mut ret_struct: ::std::mem::MaybeUninit<ReturnStruct> = ::std::mem::MaybeUninit::uninit();")?;
                 // Call the device function
-                write!(w, "{}(", callee.name)?;
+                write!(w, "{}_{}(", self.module_name, callee.name)?;
                 if self.backing_allocations[&callee_id].contains_key(&self.devices[callee_id.idx()])
                 {
                     write!(w, "backing, ")?;
@@ -672,8 +676,9 @@ impl<'a> RTContext<'a> {
                 };
                 write!(
                     block,
-                    "{}{}(",
+                    "{}{}_{}(",
                     prefix,
+                    self.module_name,
                     self.module.functions[callee_id.idx()].name
                 )?;
                 for (device, (offset, size)) in self.backing_allocations[&self.func_id]
@@ -1463,7 +1468,7 @@ impl<'a> RTContext<'a> {
         }
 
         // Call the wrapped function.
-        write!(w, "let ret = {}(", func.name)?;
+        write!(w, "let ret = {}_{}(", self.module_name, func.name)?;
         for (device, _) in self.backing_allocations[&self.func_id].iter() {
             write!(
                 w,
@@ -1630,8 +1635,9 @@ impl<'a> RTContext<'a> {
         let func = &self.module.functions[func_id.idx()];
         write!(
             w,
-            "{}fn {}(",
+            "{}fn {}_{}(",
             if is_unsafe { "unsafe " } else { "" },
+            self.module_name,
             func.name
         )?;
         let mut first_param = true;
@@ -1667,7 +1673,7 @@ impl<'a> RTContext<'a> {
         func_id: FunctionID,
     ) -> Result<(), Error> {
         let func = &self.module.functions[func_id.idx()];
-        write!(w, "fn {}(", func.name)?;
+        write!(w, "fn {}_{}(", self.module_name, func.name)?;
         let mut first_param = true;
         if self.backing_allocations[&func_id].contains_key(&self.devices[func_id.idx()]) {
             first_param = false;
diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch
index 9cbdb942..5fe48a83 100644
--- a/juno_samples/rodinia/cfd/src/cpu_euler.sch
+++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch
@@ -1,23 +1,30 @@
-gvn(*);
-dce(*);
-phi-elim(*);
-dce(*);
-crc(*);
-dce(*);
-slf(*);
-dce(*);
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  crc(X);
+  slf(X);
+  dce(X);
+  infer-schedules(X);
+}
 
-let auto = auto-outline(euler);
-cpu(auto.euler);
-
-inline(auto.euler);
-inline(auto.euler);
+simpl!(*);
+inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step);
 delete-uncalled(*);
+simpl!(*);
+ip-sroa[false](*);
+sroa[false](*);
+predication(*);
+const-inline(*);
+simpl!(*);
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+}
+simpl!(*);
 
-sroa[false](auto.euler);
-dce(*);
-float-collections(*);
-dce(*);
+unforkify(*);
 
 gcm(*);
-
diff --git a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
index 252015c3..6329c504 100644
--- a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
+++ b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch
@@ -1,23 +1,30 @@
-gvn(*);
-dce(*);
-phi-elim(*);
-dce(*);
-crc(*);
-dce(*);
-slf(*);
-dce(*);
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  crc(X);
+  slf(X);
+  dce(X);
+  infer-schedules(X);
+}
 
-let auto = auto-outline(pre_euler);
-cpu(auto.pre_euler);
-
-inline(auto.pre_euler);
-inline(auto.pre_euler);
+simpl!(*);
+inline(compute_step_factor, compute_flux, compute_flux_contributions, compute_flux_contribution, time_step);
 delete-uncalled(*);
+simpl!(*);
+ip-sroa[false](*);
+sroa[false](*);
+predication(*);
+const-inline(*);
+simpl!(*);
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+}
+simpl!(*);
 
-sroa[false](auto.pre_euler);
-dce(*);
-float-collections(*);
-dce(*);
+unforkify(*);
 
 gcm(*);
-
diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs
index 3c142700..e049f985 100644
--- a/juno_scheduler/src/pm.rs
+++ b/juno_scheduler/src/pm.rs
@@ -951,6 +951,7 @@ impl PassManager {
         for idx in 0..module.functions.len() {
             match devices[idx] {
                 Device::LLVM => cpu_codegen(
+                    &module_name,
                     &module.functions[idx],
                     &module.types,
                     &module.constants,
@@ -966,6 +967,7 @@ impl PassManager {
                     error: format!("{}", e),
                 })?,
                 Device::CUDA => gpu_codegen(
+                    &module_name,
                     &module.functions[idx],
                     &module.types,
                     &module.constants,
@@ -986,6 +988,7 @@ impl PassManager {
                     error: format!("{}", e),
                 })?,
                 Device::AsyncRust => rt_codegen(
+                    &module_name,
                     FunctionID::new(idx),
                     &module,
                     &def_uses[idx],
-- 
GitLab


From dfff5eecca3f6a133fb4c82f29d221a4f406ed13 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 27 Feb 2025 14:20:22 -0600
Subject: [PATCH 8/9] srad cpu schedule

---
 juno_samples/rodinia/srad/build.rs    |  2 ++
 juno_samples/rodinia/srad/src/cpu.sch | 35 +++++++++++++++++++++++++++
 juno_samples/rodinia/srad/src/srad.jn | 13 +++++-----
 3 files changed, 44 insertions(+), 6 deletions(-)
 create mode 100644 juno_samples/rodinia/srad/src/cpu.sch

diff --git a/juno_samples/rodinia/srad/build.rs b/juno_samples/rodinia/srad/build.rs
index 36ba6120..5e1f78f7 100644
--- a/juno_samples/rodinia/srad/build.rs
+++ b/juno_samples/rodinia/srad/build.rs
@@ -13,6 +13,8 @@ fn main() {
     JunoCompiler::new()
         .file_in_src("srad.jn")
         .unwrap()
+        .schedule_in_src("cpu.sch")
+        .unwrap()
         .build()
         .unwrap();
 }
diff --git a/juno_samples/rodinia/srad/src/cpu.sch b/juno_samples/rodinia/srad/src/cpu.sch
new file mode 100644
index 00000000..1a81ddad
--- /dev/null
+++ b/juno_samples/rodinia/srad/src/cpu.sch
@@ -0,0 +1,35 @@
+macro simpl!(X) {
+  ccp(X);
+  simplify-cfg(X);
+  lift-dc-math(X);
+  gvn(X);
+  phi-elim(X);
+  dce(X);
+  infer-schedules(X);
+}
+
+phi-elim(*);
+let loop1 = outline(srad@loop1);
+let loop2 = outline(srad@loop2);
+let loop3 = outline(srad@loop3);
+simpl!(*);
+const-inline(*);
+crc(*);
+slf(*);
+write-predication(*);
+simpl!(*);
+predication(*);
+simpl!(*);
+predication(*);
+simpl!(*);
+fixpoint {
+  forkify(*);
+  fork-guard-elim(*);
+  fork-coalesce(*);
+}
+simpl!(*);
+
+fork-split(*);
+unforkify(*);
+
+gcm(*);
diff --git a/juno_samples/rodinia/srad/src/srad.jn b/juno_samples/rodinia/srad/src/srad.jn
index 5eea647c..3e016a99 100644
--- a/juno_samples/rodinia/srad/src/srad.jn
+++ b/juno_samples/rodinia/srad/src/srad.jn
@@ -38,7 +38,7 @@ fn srad<nrows, ncols: usize>(
 
     // These loops should really be interchanged, but they aren't in the
     // Rodinia source (though they are in the HPVM source)
-    for i in 0..nrows {
+    @loop1 for i in 0..nrows {
       for j in 0..ncols {
         let tmp = image[j, i];
         sum += tmp;
@@ -57,7 +57,7 @@ fn srad<nrows, ncols: usize>(
 
     let c : f32[ncols, nrows];
 
-    for j in 0..ncols {
+    @loop2 for j in 0..ncols {
       for i in 0..nrows {
         let Jc = image[j, i];
         dN[j, i] = image[j, iN[i] as u64] - Jc;
@@ -75,14 +75,15 @@ fn srad<nrows, ncols: usize>(
         let qsqr = num / (den * den);
 
         let den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr));
-        c[j, i] = 1.0 / (1.0 + den);
+	let val = 1.0 / (1.0 + den);
 
-        if c[j, i] < 0      { c[j, i] = 0; }
-        else if c[j, i] > 1 { c[j, i] = 1; }
+        if val < 0      { c[j, i] = 0; }
+        else if val > 1 { c[j, i] = 1; }
+	else            { c[j, i] = val; }
       }
     }
 
-    for j in 0..ncols {
+    @loop3 for j in 0..ncols {
       for i in 0..nrows {
         let cN = c[j, i];
         let cS = c[j, iS[i] as u64];
-- 
GitLab


From 2acc3edec7b8f001eb56cae90935e47716ff2800 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 27 Feb 2025 14:23:38 -0600
Subject: [PATCH 9/9] fix

---
 hercules_cg/src/gpu.rs | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs
index 1ba8302a..c9720273 100644
--- a/hercules_cg/src/gpu.rs
+++ b/hercules_cg/src/gpu.rs
@@ -724,8 +724,13 @@ namespace cg = cooperative_groups;
         write!(w, "\tcudaError_t err;\n")?;
         write!(
             w,
-            "\t{}_gpu<<<{}, {}, {}>>>({});\n",
-            self.function.name, num_blocks, num_threads, dynamic_shared_offset, pass_args
+            "\t{}_{}_gpu<<<{}, {}, {}>>>({});\n",
+            self.module_name,
+            self.function.name,
+            num_blocks,
+            num_threads,
+            dynamic_shared_offset,
+            pass_args
         )?;
         write!(w, "\terr = cudaGetLastError();\n")?;
         write!(
-- 
GitLab