From 8ef4d9c72915d5ac21aab064899494b7b258a7de Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 10 Nov 2024 19:03:23 -0600
Subject: [PATCH 1/6] Set up inline pass

---
 hercules_opt/src/inline.rs | 11 +++++++++++
 hercules_opt/src/lib.rs    |  2 ++
 hercules_opt/src/pass.rs   | 21 +++++++++++++++++++++
 3 files changed, 34 insertions(+)
 create mode 100644 hercules_opt/src/inline.rs

diff --git a/hercules_opt/src/inline.rs b/hercules_opt/src/inline.rs
new file mode 100644
index 00000000..0d9e3892
--- /dev/null
+++ b/hercules_opt/src/inline.rs
@@ -0,0 +1,11 @@
+extern crate hercules_ir;
+
+use self::hercules_ir::ir::*;
+
+use crate::*;
+
+/*
+ * Top level function to run inlining. Currently, inlines every function call,
+ * since mutual recursion is not valid in Hercules IR.
+ */
+pub fn inline(editors: &mut [FunctionEditor]) {}
diff --git a/hercules_opt/src/lib.rs b/hercules_opt/src/lib.rs
index ff789dd2..bc2e8687 100644
--- a/hercules_opt/src/lib.rs
+++ b/hercules_opt/src/lib.rs
@@ -6,6 +6,7 @@ pub mod editor;
 pub mod fork_guard_elim;
 pub mod forkify;
 pub mod gvn;
+pub mod inline;
 pub mod pass;
 pub mod phi_elim;
 pub mod pred;
@@ -17,6 +18,7 @@ pub use crate::editor::*;
 pub use crate::fork_guard_elim::*;
 pub use crate::forkify::*;
 pub use crate::gvn::*;
+pub use crate::inline::*;
 pub use crate::pass::*;
 pub use crate::phi_elim::*;
 pub use crate::pred::*;
diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs
index f27f1f61..d70e6192 100644
--- a/hercules_opt/src/pass.rs
+++ b/hercules_opt/src/pass.rs
@@ -31,6 +31,7 @@ pub enum Pass {
     ForkGuardElim,
     Predication,
     SROA,
+    Inline,
     Verify,
     // Parameterized over whether analyses that aid visualization are necessary.
     // Useful to set to false if displaying a potentially broken module.
@@ -464,6 +465,26 @@ impl PassManager {
                     self.legacy_repair_plan();
                     self.clear_analyses();
                 }
+                Pass::Inline => {
+                    self.make_def_uses();
+                    let def_uses = self.def_uses.as_ref().unwrap();
+                    let mut editors: Vec<_> =
+                        zip(self.module.functions.iter_mut(), def_uses.iter())
+                            .map(|(func, def_use)| FunctionEditor::new(func, def_use))
+                            .collect();
+                    inline(&mut editors);
+                    let edits: Vec<_> = editors.into_iter().map(|editor| editor.edits()).collect();
+                    for idx in 0..edits.len() {
+                        if let Some(plans) = self.plans.as_mut() {
+                            repair_plan(&mut plans[idx], &self.module.functions[idx], &edits[idx]);
+                        }
+                        let grave_mapping = self.module.functions[idx].delete_gravestones();
+                        if let Some(plans) = self.plans.as_mut() {
+                            plans[idx].fix_gravestones(&grave_mapping);
+                        }
+                    }
+                    self.clear_analyses();
+                }
                 Pass::Verify => {
                     let (
                         def_uses,
-- 
GitLab


From 953f624f1e5f6b5c6087aee2c19e5eb0ba8d67f1 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 10 Nov 2024 19:28:45 -0600
Subject: [PATCH 2/6] Calculate callgraph

---
 hercules_ir/src/callgraph.rs | 106 +++++++++++++++++++++++++++++++++++
 hercules_ir/src/def_use.rs   |   1 -
 hercules_ir/src/lib.rs       |   2 +
 3 files changed, 108 insertions(+), 1 deletion(-)
 create mode 100644 hercules_ir/src/callgraph.rs

diff --git a/hercules_ir/src/callgraph.rs b/hercules_ir/src/callgraph.rs
new file mode 100644
index 00000000..84be922d
--- /dev/null
+++ b/hercules_ir/src/callgraph.rs
@@ -0,0 +1,106 @@
+use crate::*;
+
+/*
+ * Custom type for an immutable call graph.
+ */
+#[derive(Debug, Clone)]
+pub struct CallGraph {
+    first_callees: Vec<u32>,
+    callees: Vec<FunctionID>,
+    first_callers: Vec<u32>,
+    callers: Vec<FunctionID>,
+}
+
+impl CallGraph {
+    pub fn num_callees(&self, id: FunctionID) -> usize {
+        if id.idx() + 1 < self.first_callees.len() {
+            self.first_callees[id.idx() + 1] as usize - self.first_callees[id.idx()] as usize
+        } else {
+            self.callees.len() - self.first_callees[id.idx()] as usize
+        }
+    }
+
+    pub fn get_callees(&self, id: FunctionID) -> &[FunctionID] {
+        let first_callee = self.first_callees[id.idx()] as usize;
+        let num_callees = self.num_callees(id) as usize;
+        &self.callees[first_callee..first_callee + num_callees]
+    }
+
+    pub fn num_callers(&self, id: FunctionID) -> usize {
+        if id.idx() + 1 < self.first_callers.len() {
+            self.first_callers[id.idx() + 1] as usize - self.first_callers[id.idx()] as usize
+        } else {
+            self.callers.len() - self.first_callers[id.idx()] as usize
+        }
+    }
+
+    pub fn get_callers(&self, id: FunctionID) -> &[FunctionID] {
+        let first_caller = self.first_callers[id.idx()] as usize;
+        let num_callers = self.num_callers(id) as usize;
+        &self.callers[first_caller..first_caller + num_callers]
+    }
+
+    pub fn num_functions(&self) -> usize {
+        self.first_callees.len()
+    }
+}
+
+/*
+ * Top level function to calculate the call graph of a Hercules module.
+ */
+pub fn callgraph(module: &Module) -> CallGraph {
+    // Step 1: collect the functions called in each function.
+    let callee_functions: Vec<Vec<FunctionID>> = module
+        .functions
+        .iter()
+        .map(|func| {
+            let mut called: Vec<_> = func
+                .nodes
+                .iter()
+                .filter_map(|node| {
+                    if let Node::Call {
+                        control: _,
+                        function,
+                        dynamic_constants: _,
+                        args: _,
+                    } = node
+                    {
+                        Some(*function)
+                    } else {
+                        None
+                    }
+                })
+                .collect();
+            called.sort_unstable();
+            called.dedup();
+            called
+        })
+        .collect();
+
+    // Step 2: collect the functions calling each function.
+    let mut caller_functions = vec![vec![]; callee_functions.len()];
+    for (caller_idx, callees) in callee_functions.iter().enumerate() {
+        let caller_id = FunctionID::new(caller_idx);
+        for callee in callees {
+            caller_functions[callee.idx()].push(caller_id);
+        }
+    }
+
+    // Step 3: pack callee/caller info into CallGraph structure.
+    let mut callgraph = CallGraph {
+        first_callees: vec![],
+        callees: vec![],
+        first_callers: vec![],
+        callers: vec![],
+    };
+    for callees in callee_functions {
+        callgraph.first_callees.push(callgraph.callees.len() as u32);
+        callgraph.callees.extend(callees);
+    }
+    for callers in caller_functions {
+        callgraph.first_callers.push(callgraph.callers.len() as u32);
+        callgraph.callers.extend(callers);
+    }
+
+    callgraph
+}
diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
index 3cca0e75..d7c7b3a1 100644
--- a/hercules_ir/src/def_use.rs
+++ b/hercules_ir/src/def_use.rs
@@ -64,7 +64,6 @@ pub fn def_use(function: &Function) -> ImmutableDefUseMap {
         }
     }
 
-    // Step 5: pack and return.
     ImmutableDefUseMap { first_edges, users }
 }
 
diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs
index 8dd42d6c..f7277cfa 100644
--- a/hercules_ir/src/lib.rs
+++ b/hercules_ir/src/lib.rs
@@ -8,6 +8,7 @@
 
 pub mod antideps;
 pub mod build;
+pub mod callgraph;
 pub mod dataflow;
 pub mod def_use;
 pub mod dom;
@@ -23,6 +24,7 @@ pub mod verify;
 
 pub use crate::antideps::*;
 pub use crate::build::*;
+pub use crate::callgraph::*;
 pub use crate::dataflow::*;
 pub use crate::def_use::*;
 pub use crate::dom::*;
-- 
GitLab


From 9000849a37edc0d6c47a8a0108f3aebc9fcef0ef Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 10 Nov 2024 19:33:18 -0600
Subject: [PATCH 3/6] Get callgraph to inline

---
 hercules_opt/src/inline.rs |  9 ++++++++-
 hercules_opt/src/pass.rs   | 15 ++++++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/hercules_opt/src/inline.rs b/hercules_opt/src/inline.rs
index 0d9e3892..dce0ead7 100644
--- a/hercules_opt/src/inline.rs
+++ b/hercules_opt/src/inline.rs
@@ -1,6 +1,8 @@
 extern crate hercules_ir;
 
+use self::hercules_ir::callgraph::*;
 use self::hercules_ir::ir::*;
+use self::hercules_ir::schedule::*;
 
 use crate::*;
 
@@ -8,4 +10,9 @@ use crate::*;
  * Top level function to run inlining. Currently, inlines every function call,
  * since mutual recursion is not valid in Hercules IR.
  */
-pub fn inline(editors: &mut [FunctionEditor]) {}
+pub fn inline(
+    editors: &mut [FunctionEditor],
+    callgraph: &CallGraph,
+    plans: Option<&mut Vec<Plan>>,
+) {
+}
diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs
index d70e6192..6ede12d4 100644
--- a/hercules_opt/src/pass.rs
+++ b/hercules_opt/src/pass.rs
@@ -66,6 +66,7 @@ pub struct PassManager {
     pub loops: Option<Vec<LoopTree>>,
     pub antideps: Option<Vec<Vec<(NodeID, NodeID)>>>,
     pub bbs: Option<Vec<Vec<NodeID>>>,
+    pub callgraph: Option<CallGraph>,
 
     // Current plan.
     pub plans: Option<Vec<Plan>>,
@@ -90,6 +91,7 @@ impl PassManager {
             loops: None,
             antideps: None,
             bbs: None,
+            callgraph: None,
             plans: None,
             manifests: None,
         }
@@ -280,6 +282,12 @@ impl PassManager {
         }
     }
 
+    pub fn make_callgraph(&mut self) {
+        if self.callgraph.is_none() {
+            self.callgraph = Some(callgraph(&self.module));
+        }
+    }
+
     pub fn make_plans(&mut self) {
         if self.plans.is_none() {
             self.make_def_uses();
@@ -467,12 +475,16 @@ impl PassManager {
                 }
                 Pass::Inline => {
                     self.make_def_uses();
+                    self.make_callgraph();
                     let def_uses = self.def_uses.as_ref().unwrap();
+                    let callgraph = self.callgraph.as_ref().unwrap();
                     let mut editors: Vec<_> =
                         zip(self.module.functions.iter_mut(), def_uses.iter())
                             .map(|(func, def_use)| FunctionEditor::new(func, def_use))
                             .collect();
-                    inline(&mut editors);
+                    // Inlining is special in that it may modify partitions in a
+                    // inter-procedural fashion.
+                    inline(&mut editors, callgraph, self.plans.as_mut());
                     let edits: Vec<_> = editors.into_iter().map(|editor| editor.edits()).collect();
                     for idx in 0..edits.len() {
                         if let Some(plans) = self.plans.as_mut() {
@@ -667,6 +679,7 @@ impl PassManager {
         self.loops = None;
         self.antideps = None;
         self.bbs = None;
+        self.callgraph = None;
 
         // Don't clear the plan - this is repaired, not reconstructed.
     }
-- 
GitLab


From e2a1434def87a4650acdfb88d8b568c137bdd73e Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 10 Nov 2024 23:46:38 -0600
Subject: [PATCH 4/6] Iterate function in topo order and hacks to get the
 necessary refs

---
 hercules_cg/src/cpu.rs            |  3 ++
 hercules_cg/src/sched_dot.rs      |  4 +-
 hercules_cg/src/sched_schedule.rs |  4 ++
 hercules_ir/src/ir.rs             | 11 +++--
 hercules_opt/src/inline.rs        | 82 ++++++++++++++++++++++++++++++-
 5 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs
index 8b5df931..eaa7374b 100644
--- a/hercules_cg/src/cpu.rs
+++ b/hercules_cg/src/cpu.rs
@@ -1,4 +1,5 @@
 extern crate bitvec;
+extern crate hercules_ir;
 
 use std::cell::{Cell, RefCell};
 use std::collections::{HashMap, HashSet, VecDeque};
@@ -7,6 +8,8 @@ use std::iter::once;
 
 use self::bitvec::prelude::*;
 
+use self::hercules_ir::*;
+
 use crate::*;
 
 /*
diff --git a/hercules_cg/src/sched_dot.rs b/hercules_cg/src/sched_dot.rs
index b997138d..f0446189 100644
--- a/hercules_cg/src/sched_dot.rs
+++ b/hercules_cg/src/sched_dot.rs
@@ -1,4 +1,5 @@
 extern crate bitvec;
+extern crate hercules_ir;
 extern crate rand;
 
 use std::collections::{HashMap, VecDeque};
@@ -9,9 +10,10 @@ use std::io::Write as _;
 use std::process::Command;
 
 use self::bitvec::prelude::*;
-
 use self::rand::Rng;
 
+use self::hercules_ir::*;
+
 use crate::*;
 
 /*
diff --git a/hercules_cg/src/sched_schedule.rs b/hercules_cg/src/sched_schedule.rs
index 5300b990..16720bbc 100644
--- a/hercules_cg/src/sched_schedule.rs
+++ b/hercules_cg/src/sched_schedule.rs
@@ -1,6 +1,10 @@
+extern crate hercules_ir;
+
 use std::collections::{HashMap, HashSet, VecDeque};
 use std::iter::{empty, once, zip};
 
+use self::hercules_ir::*;
+
 use crate::*;
 
 /*
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 9991004f..5cf549a8 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -1468,6 +1468,11 @@ impl Intrinsic {
  * Rust things to make newtyped IDs usable.
  */
 
+pub trait ID: Clone + Eq + Ord + std::hash::Hash + Copy {
+    fn new(x: usize) -> Self;
+    fn idx(&self) -> usize;
+}
+
 #[macro_export]
 macro_rules! define_id_type {
     ($x: ident) => {
@@ -1486,12 +1491,12 @@ macro_rules! define_id_type {
         )]
         pub struct $x(u32);
 
-        impl $x {
-            pub fn new(x: usize) -> Self {
+        impl ID for $x {
+            fn new(x: usize) -> Self {
                 $x(x as u32)
             }
 
-            pub fn idx(&self) -> usize {
+            fn idx(&self) -> usize {
                 self.0 as usize
             }
         }
diff --git a/hercules_opt/src/inline.rs b/hercules_opt/src/inline.rs
index dce0ead7..7f592f9b 100644
--- a/hercules_opt/src/inline.rs
+++ b/hercules_opt/src/inline.rs
@@ -1,5 +1,7 @@
 extern crate hercules_ir;
 
+use std::collections::HashMap;
+
 use self::hercules_ir::callgraph::*;
 use self::hercules_ir::ir::*;
 use self::hercules_ir::schedule::*;
@@ -13,6 +15,84 @@ use crate::*;
 pub fn inline(
     editors: &mut [FunctionEditor],
     callgraph: &CallGraph,
-    plans: Option<&mut Vec<Plan>>,
+    mut plans: Option<&mut Vec<Plan>>,
 ) {
+    // Step 1: run topological sort on the call graph to inline the "deepest"
+    // function first. Mutual recursion is not currently supported, so assert
+    // that a topological sort exists.
+    let mut num_calls: Vec<usize> = (0..editors.len())
+        .map(|idx| callgraph.num_callees(FunctionID::new(idx)))
+        .collect();
+    let mut no_calls_stack: Vec<FunctionID> = num_calls
+        .iter()
+        .enumerate()
+        .filter(|(_, num)| **num == 0)
+        .map(|(idx, _)| FunctionID::new(idx))
+        .collect();
+    let mut topo = vec![];
+    while let Some(no_call_func) = no_calls_stack.pop() {
+        topo.push(no_call_func);
+        for caller in callgraph.get_callers(no_call_func) {
+            num_calls[caller.idx()] -= 1;
+            if num_calls[caller.idx()] == 0 {
+                no_calls_stack.push(*caller);
+            }
+        }
+    }
+    assert_eq!(
+        topo.len(),
+        editors.len(),
+        "PANIC: Found mutual recursion in Hercules IR."
+    );
+
+    // Step 2: run inlining on each function individually. Iterate the functions
+    // in topological order.
+    for to_inline_id in topo {
+        // Since Rust cannot analyze the accesses into an array of mutable
+        // references, we need to do some weirdness here to simultaneously get:
+        // 1. A mutable reference to the function we're modifying.
+        // 2. Shared references to all of the functions called by that function.
+        // We need to get the same for plans, if we receive them.
+        let callees = callgraph.get_callees(to_inline_id);
+        let editor_refs = get_mut_and_immuts(editors, to_inline_id, callees);
+        let plan_refs = plans
+            .as_mut()
+            .map(|plans| get_mut_and_immuts(*plans, to_inline_id, callees));
+    }
+}
+
+/*
+ * Helper function to get from an array of mutable references:
+ * 1. A single mutable reference.
+ * 2. Several shared references.
+ * Where none of the references alias. We need to use this both for function
+ * editors and plans.
+ */
+fn get_mut_and_immuts<'a, T, I: ID>(
+    mut_refs: &'a mut [T],
+    mut_id: I,
+    shared_id: &[I],
+) -> (&'a mut T, HashMap<I, &'a T>) {
+    let mut all_id = Vec::from(shared_id);
+    all_id.sort_unstable();
+    all_id.insert(all_id.binary_search(&mut_id).unwrap_err(), mut_id);
+    let mut mut_ref = None;
+    let mut shared_refs = HashMap::new();
+    let mut cursor = 0;
+    let mut slice = &mut *mut_refs;
+    for id in all_id {
+        let (left, right) = slice.split_at_mut(id.idx() - cursor);
+        cursor += left.len() + 1;
+        let (left, right) = right.split_at_mut(1);
+        let item = &mut left[0];
+        if id == mut_id {
+            assert!(mut_ref.is_none());
+            mut_ref = Some(item);
+        } else {
+            shared_refs.insert(id, &*item);
+        }
+        slice = right;
+    }
+
+    (mut_ref.unwrap(), shared_refs)
 }
-- 
GitLab


From 1f0c1a4a8c3704dd69175887013f889a82aa6432 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 11 Nov 2024 17:08:05 -0600
Subject: [PATCH 5/6] Skeleton of function to run inlining on single func,
 utility to make function have only one return node

---
 hercules_opt/src/inline.rs | 72 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/hercules_opt/src/inline.rs b/hercules_opt/src/inline.rs
index 7f592f9b..3f6fd042 100644
--- a/hercules_opt/src/inline.rs
+++ b/hercules_opt/src/inline.rs
@@ -3,6 +3,7 @@ extern crate hercules_ir;
 use std::collections::HashMap;
 
 use self::hercules_ir::callgraph::*;
+use self::hercules_ir::def_use::*;
 use self::hercules_ir::ir::*;
 use self::hercules_ir::schedule::*;
 
@@ -58,6 +59,7 @@ pub fn inline(
         let plan_refs = plans
             .as_mut()
             .map(|plans| get_mut_and_immuts(*plans, to_inline_id, callees));
+        inline_func(editor_refs.0, editor_refs.1, plan_refs);
     }
 }
 
@@ -96,3 +98,73 @@ fn get_mut_and_immuts<'a, T, I: ID>(
 
     (mut_ref.unwrap(), shared_refs)
 }
+
+/*
+ * Run inlining on a single function. Pass a mutable reference to the function
+ * to modify and shared references for all called functions.
+ */
+fn inline_func(
+    editor: &mut FunctionEditor,
+    called: HashMap<FunctionID, &FunctionEditor>,
+    plans: Option<(&mut Plan, HashMap<FunctionID, &Plan>)>,
+) {
+    let first_num_nodes = editor.func().nodes.len();
+    for id in (0..first_num_nodes).map(NodeID::new) {
+        let Node::Call {
+            control,
+            function,
+            dynamic_constants,
+            args,
+        } = &editor.func().nodes[id.idx()]
+        else {
+            continue;
+        };
+
+        let old_num_nodes = editor.func().nodes.len();
+        let old_id_to_new_id = |old_id: NodeID| NodeID::new(old_id.idx() + old_num_nodes);
+        let called_func = called[function].func();
+
+        let success = editor.edit(|edit| todo!());
+    }
+}
+
+/*
+ * Top level function to make a function have only a single return.
+ */
+pub fn collapse_returns(editor: &mut FunctionEditor) {
+    let returns: Vec<NodeID> = (0..editor.func().nodes.len())
+        .filter(|idx| editor.func().nodes[*idx].is_return())
+        .map(NodeID::new)
+        .collect();
+    assert!(!returns.is_empty());
+    if returns.len() == 1 {
+        return;
+    }
+    let preds_before_returns: Vec<NodeID> = returns
+        .iter()
+        .map(|ret_id| get_uses(&editor.func().nodes[ret_id.idx()]).as_ref()[0])
+        .collect();
+    let data_to_return: Vec<NodeID> = returns
+        .iter()
+        .map(|ret_id| get_uses(&editor.func().nodes[ret_id.idx()]).as_ref()[1])
+        .collect();
+
+    // All of the old returns get replaced in a single edit.
+    editor.edit(|mut edit| {
+        let region = edit.add_node(Node::Region {
+            preds: preds_before_returns.into_boxed_slice(),
+        });
+        let phi = edit.add_node(Node::Phi {
+            control: region,
+            data: data_to_return.into_boxed_slice(),
+        });
+        edit.add_node(Node::Return {
+            control: region,
+            data: phi,
+        });
+        for ret in returns {
+            edit = edit.delete_node(ret)?;
+        }
+        Ok(edit)
+    });
+}
-- 
GitLab


From bebbe6fa6b5b1aa567e6a6f211306341657b0c1d Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 11 Nov 2024 20:37:57 -0600
Subject: [PATCH 6/6] Simple example of inlining (without partitions) works

---
 hercules_opt/src/editor.rs |   8 ++-
 hercules_opt/src/inline.rs | 118 +++++++++++++++++++++++++++++++++----
 2 files changed, 111 insertions(+), 15 deletions(-)

diff --git a/hercules_opt/src/editor.rs b/hercules_opt/src/editor.rs
index 1b182c57..256c1eba 100644
--- a/hercules_opt/src/editor.rs
+++ b/hercules_opt/src/editor.rs
@@ -199,6 +199,10 @@ impl<'a, 'b> FunctionEdit<'a, 'b> {
         }
     }
 
+    fn is_mutable(&self, id: NodeID) -> bool {
+        id.idx() >= self.editor.mutable_nodes.len() || self.editor.mutable_nodes[id.idx()]
+    }
+
     pub fn add_node(&mut self, node: Node) -> NodeID {
         let id = NodeID::new(self.editor.function.nodes.len() + self.added.len());
         // Added nodes need to have an entry in the def-use map.
@@ -218,7 +222,7 @@ impl<'a, 'b> FunctionEdit<'a, 'b> {
     pub fn delete_node(mut self, id: NodeID) -> Result<Self, Self> {
         // We can only delete mutable nodes. Return None if we try to modify an
         // immutable node, as it means the whole edit should be aborted.
-        if self.editor.mutable_nodes[id.idx()] {
+        if self.is_mutable(id) {
             assert!(
                 !self.added.contains(&id),
                 "PANIC: Please don't delete a node that was added in the same edit."
@@ -243,7 +247,7 @@ impl<'a, 'b> FunctionEdit<'a, 'b> {
         // We can only replace uses of mutable nodes. Return None if we try to
         // replace uses of an immutable node, as it means the whole edit should
         // be aborted.
-        if self.editor.mutable_nodes[old.idx()] {
+        if self.is_mutable(old) {
             // Update all of the users of the old node.
             self.ensure_updated_def_use_entry(old);
             for user_id in self.updated_def_use[&old].iter() {
diff --git a/hercules_opt/src/inline.rs b/hercules_opt/src/inline.rs
index 3f6fd042..bd192c39 100644
--- a/hercules_opt/src/inline.rs
+++ b/hercules_opt/src/inline.rs
@@ -46,7 +46,15 @@ pub fn inline(
         "PANIC: Found mutual recursion in Hercules IR."
     );
 
-    // Step 2: run inlining on each function individually. Iterate the functions
+    // Step 2: make sure each function has a single return node. If an edit
+    // failed to make a function have a single return node, then we can't inline
+    // calls of it.
+    let single_return_nodes: Vec<_> = editors
+        .iter_mut()
+        .map(|editor| collapse_returns(editor))
+        .collect();
+
+    // Step 3: run inlining on each function individually. Iterate the functions
     // in topological order.
     for to_inline_id in topo {
         // Since Rust cannot analyze the accesses into an array of mutable
@@ -59,7 +67,12 @@ pub fn inline(
         let plan_refs = plans
             .as_mut()
             .map(|plans| get_mut_and_immuts(*plans, to_inline_id, callees));
-        inline_func(editor_refs.0, editor_refs.1, plan_refs);
+        inline_func(
+            editor_refs.0,
+            editor_refs.1,
+            plan_refs,
+            &single_return_nodes,
+        );
     }
 }
 
@@ -107,38 +120,115 @@ fn inline_func(
     editor: &mut FunctionEditor,
     called: HashMap<FunctionID, &FunctionEditor>,
     plans: Option<(&mut Plan, HashMap<FunctionID, &Plan>)>,
+    single_return_nodes: &Vec<Option<NodeID>>,
 ) {
     let first_num_nodes = editor.func().nodes.len();
     for id in (0..first_num_nodes).map(NodeID::new) {
+        // Break down the call node.
         let Node::Call {
             control,
             function,
-            dynamic_constants,
-            args,
-        } = &editor.func().nodes[id.idx()]
+            ref dynamic_constants,
+            ref args,
+        } = editor.func().nodes[id.idx()]
         else {
             continue;
         };
 
+        // Assemble all the info we'll need to do the edit.
+        let dcs = dynamic_constants.clone();
+        assert!(
+            dcs.is_empty(),
+            "TODO: Implement inlining dynamic constant arguments."
+        );
+        let args = args.clone();
         let old_num_nodes = editor.func().nodes.len();
         let old_id_to_new_id = |old_id: NodeID| NodeID::new(old_id.idx() + old_num_nodes);
-        let called_func = called[function].func();
+        let call_pred = get_uses(&editor.func().nodes[control.idx()]);
+        assert_eq!(call_pred.as_ref().len(), 1);
+        let call_pred = call_pred.as_ref()[0];
+        let called_func = called[&function].func();
+        // We can't inline calls to functions with multiple returns.
+        let Some(called_return) = single_return_nodes[function.idx()] else {
+            continue;
+        };
+        let called_return_uses = get_uses(&called_func.nodes[called_return.idx()]);
+        let called_return_pred = called_return_uses.as_ref()[0];
+        let called_return_data = called_return_uses.as_ref()[1];
 
-        let success = editor.edit(|edit| todo!());
+        // Perform the actual edit.
+        let success = editor.edit(|mut edit| {
+            // Insert the nodes from the called function. There are a few
+            // special cases:
+            // - Start: don't add start nodes - later, we'll replace_all_uses on
+            //   the start node with the one predecessor of the call's region
+            //   node.
+            // - Parameter: don't add parameter nodes - later, we'll
+            //   replace_all_uses on the parameter nodes with the arguments to
+            //   the call node.
+            // - Return: don't add return nodes - later, we'll replace_all_uses
+            //   on the call's region node with the predecessor to the return
+            //   node.
+            for (idx, node) in called_func.nodes.iter().enumerate() {
+                if node.is_start() || node.is_parameter() || node.is_return() {
+                    // We still need to add some node to make sure the IDs line
+                    // up. Just add a gravestone.
+                    edit.add_node(Node::Start);
+                    continue;
+                }
+                // Get the node from the callee function and replace all the
+                // uses with the to-be IDs in the caller function.
+                let mut node = node.clone();
+                let mut uses = get_uses_mut(&mut node);
+                for u in uses.as_mut() {
+                    **u = old_id_to_new_id(**u);
+                }
+                // Add the node and check that the IDs line up.
+                let add_id = edit.add_node(node);
+                assert_eq!(add_id, old_id_to_new_id(NodeID::new(idx)));
+            }
+
+            // Stitch the control use of the inlined start node with the
+            // predecessor control node of the call's region.
+            let start_node = &called_func.nodes[0];
+            assert!(start_node.is_start());
+            let start_id = old_id_to_new_id(NodeID::new(0));
+            edit = edit.replace_all_uses(start_id, call_pred)?;
+
+            // Stich the control use of the original call node's region with
+            // the predecessor control of the inlined function's return.
+            edit = edit.replace_all_uses(control, called_return_pred)?;
+
+            // Stitch uses of parameter nodes in the inlined function to the IDs
+            // of arguments provided to the call node.
+            for (node_idx, node) in called_func.nodes.iter().enumerate() {
+                if let Node::Parameter { index } = node {
+                    let param_id = old_id_to_new_id(NodeID::new(node_idx));
+                    edit = edit.replace_all_uses(param_id, args[*index])?;
+                }
+            }
+
+            // Finally, delete the call node.
+            edit = edit.replace_all_uses(id, old_id_to_new_id(called_return_data))?;
+            edit = edit.delete_node(control)?;
+            edit = edit.delete_node(id)?;
+
+            Ok(edit)
+        });
     }
 }
 
 /*
  * Top level function to make a function have only a single return.
  */
-pub fn collapse_returns(editor: &mut FunctionEditor) {
+pub fn collapse_returns(editor: &mut FunctionEditor) -> Option<NodeID> {
     let returns: Vec<NodeID> = (0..editor.func().nodes.len())
         .filter(|idx| editor.func().nodes[*idx].is_return())
         .map(NodeID::new)
         .collect();
     assert!(!returns.is_empty());
     if returns.len() == 1 {
-        return;
+        return Some(returns[0]);
     }
     let preds_before_returns: Vec<NodeID> = returns
         .iter()
@@ -150,6 +240,7 @@ pub fn collapse_returns(editor: &mut FunctionEditor) {
         .collect();
 
     // All of the old returns get replaced in a single edit.
+    let mut new_return = None;
     editor.edit(|mut edit| {
         let region = edit.add_node(Node::Region {
             preds: preds_before_returns.into_boxed_slice(),
@@ -158,13 +249,14 @@ pub fn collapse_returns(editor: &mut FunctionEditor) {
             control: region,
             data: data_to_return.into_boxed_slice(),
         });
-        edit.add_node(Node::Return {
-            control: region,
-            data: phi,
-        });
         for ret in returns {
             edit = edit.delete_node(ret)?;
         }
+        new_return = Some(edit.add_node(Node::Return {
+            control: region,
+            data: phi,
+        }));
         Ok(edit)
     });
+    new_return
 }
-- 
GitLab