From 4057a1324d70f607ba975905475a6a3a2b91c8a1 Mon Sep 17 00:00:00 2001
From: rarbore2 <rarbore2@illinois.edu>
Date: Sun, 10 Nov 2024 17:55:17 -0600
Subject: [PATCH] Misc. changes and fixes for calls

---
 hercules_cg/src/cpu.rs            |  23 +-
 hercules_cg/src/manifest.rs       |  15 +-
 hercules_cg/src/sched_gen.rs      |  18 +-
 hercules_ir/src/build.rs          |  37 ++-
 hercules_ir/src/def_use.rs        |  14 +-
 hercules_ir/src/dot.rs            |   2 +
 hercules_ir/src/gcm.rs            |  22 +-
 hercules_ir/src/ir.rs             |  23 +-
 hercules_ir/src/parse.rs          |   4 +-
 hercules_ir/src/schedule.rs       |   5 +-
 hercules_ir/src/typecheck.rs      |   7 +-
 hercules_ir/src/verify.rs         |  31 +-
 hercules_opt/src/ccp.rs           |   8 +-
 hercules_opt/src/editor.rs        |  34 ++-
 hercules_opt/src/pass.rs          |  11 +-
 hercules_opt/src/phi_elim.rs      |  25 +-
 hercules_opt/src/sroa.rs          |   1 +
 hercules_samples/call.hir         |   7 +-
 hercules_samples/fac/fac.hir      |   7 +-
 juno_frontend/examples/simple1.jn |   6 +-
 juno_frontend/src/codegen.rs      | 455 ++++++++++++++++--------------
 juno_frontend/src/main.rs         |  19 +-
 22 files changed, 507 insertions(+), 267 deletions(-)

diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs
index 915974bc..8b5df931 100644
--- a/hercules_cg/src/cpu.rs
+++ b/hercules_cg/src/cpu.rs
@@ -674,6 +674,28 @@ impl<'a> CPUContext<'a> {
                     }
                 }
             }
+            SInst::ProductExtract { product, indices } => {
+                emit_assign(w)?;
+                write!(w, "extractvalue ")?;
+                self.emit_svalue(product, true, w)?;
+                for index in indices {
+                    write!(w, ", {}", index)?;
+                }
+            }
+            SInst::ProductInsert {
+                product,
+                data,
+                indices,
+            } => {
+                emit_assign(w)?;
+                write!(w, "insertvalue ")?;
+                self.emit_svalue(product, true, w)?;
+                write!(w, ", ")?;
+                self.emit_svalue(data, true, w)?;
+                for index in indices {
+                    write!(w, ", {}", index)?;
+                }
+            }
             SInst::ArrayLoad {
                 array,
                 position,
@@ -752,7 +774,6 @@ impl<'a> CPUContext<'a> {
                     write!(w, ", ptr %store_ptr_{}", virt_reg)?;
                 }
             }
-            _ => {}
         }
         write!(w, "\n")?;
 
diff --git a/hercules_cg/src/manifest.rs b/hercules_cg/src/manifest.rs
index 7f13c4b4..f7161bed 100644
--- a/hercules_cg/src/manifest.rs
+++ b/hercules_cg/src/manifest.rs
@@ -138,19 +138,20 @@ impl PartitionManifest {
 }
 
 impl DeviceManifest {
-    pub fn default(device: Device) -> Self {
-        match device {
-            Device::CPU => DeviceManifest::CPU {
-                parallel_launch: Box::new([]),
-            },
-            Device::GPU => todo!(),
+    pub fn cpu() -> Self {
+        DeviceManifest::CPU {
+            parallel_launch: Box::new([]),
         }
     }
 
+    pub fn gpu() -> Self {
+        DeviceManifest::GPU
+    }
+
     pub fn num_parallel_launch_dims(&self) -> usize {
         match self {
             DeviceManifest::CPU { parallel_launch } => parallel_launch.len(),
-            DeviceManifest::GPU => 0,
+            _ => panic!(),
         }
     }
 }
diff --git a/hercules_cg/src/sched_gen.rs b/hercules_cg/src/sched_gen.rs
index 4f9decce..f065340c 100644
--- a/hercules_cg/src/sched_gen.rs
+++ b/hercules_cg/src/sched_gen.rs
@@ -28,6 +28,12 @@ pub fn sched_compile(
     let stypes = convert_to_sched_ir_types(&module.types);
     verify_constants_well_formed_for_sched_ir(&module.constants);
     let sconstants = convert_to_sched_ir_constants(&module.constants);
+    let function_names: HashMap<FunctionID, String> = module
+        .functions
+        .iter()
+        .enumerate()
+        .map(|(idx, function)| (FunctionID::new(idx), function.name.clone()))
+        .collect();
 
     let mut functions = HashMap::new();
     let mut manifests = HashMap::new();
@@ -47,6 +53,7 @@ pub fn sched_compile(
             &plans[idx],
             &stypes,
             &sconstants,
+            &function_names,
         )
         .compile_function();
 
@@ -190,6 +197,7 @@ struct FunctionContext<'a> {
     plan: &'a Plan,
     stypes: &'a Vec<SType>,
     sconstants: &'a Vec<SConstant>,
+    function_names: &'a HashMap<FunctionID, String>,
 
     top_nodes: Vec<NodeID>,
     partition_graph: Subgraph,
@@ -216,6 +224,7 @@ impl<'a> FunctionContext<'a> {
         plan: &'a Plan,
         stypes: &'a Vec<SType>,
         sconstants: &'a Vec<SConstant>,
+        function_names: &'a HashMap<FunctionID, String>,
     ) -> Self {
         let inverted_partition_map = plan.invert_partition_map();
         let top_nodes = plan.compute_top_nodes(function, control_subgraph, &inverted_partition_map);
@@ -240,6 +249,7 @@ impl<'a> FunctionContext<'a> {
             plan,
             stypes,
             sconstants,
+            function_names,
 
             top_nodes,
             partition_graph,
@@ -429,12 +439,18 @@ impl<'a> FunctionContext<'a> {
                     }
                 }
 
+                let device = match self.plan.partition_devices[partition_idx] {
+                    Device::CPU => DeviceManifest::cpu(),
+                    Device::GPU => DeviceManifest::gpu(),
+                    Device::AsyncRust => todo!(),
+                };
+
                 PartitionManifest {
                     name,
                     parameters,
                     returns,
                     successors,
-                    device: DeviceManifest::default(self.plan.partition_devices[partition_idx]),
+                    device,
                 }
             })
             .collect();
diff --git a/hercules_ir/src/build.rs b/hercules_ir/src/build.rs
index 4202a274..ad130c1b 100644
--- a/hercules_ir/src/build.rs
+++ b/hercules_ir/src/build.rs
@@ -399,28 +399,43 @@ impl<'a> Builder<'a> {
         self.intern_dynamic_constant(DynamicConstant::Parameter(val))
     }
 
-    pub fn create_dynamic_constant_add(&mut self, x : DynamicConstantID,
-                                       y : DynamicConstantID) -> DynamicConstantID {
+    pub fn create_dynamic_constant_add(
+        &mut self,
+        x: DynamicConstantID,
+        y: DynamicConstantID,
+    ) -> DynamicConstantID {
         self.intern_dynamic_constant(DynamicConstant::Add(x, y))
     }
 
-    pub fn create_dynamic_constant_sub(&mut self, x : DynamicConstantID,
-                                       y : DynamicConstantID) -> DynamicConstantID {
+    pub fn create_dynamic_constant_sub(
+        &mut self,
+        x: DynamicConstantID,
+        y: DynamicConstantID,
+    ) -> DynamicConstantID {
         self.intern_dynamic_constant(DynamicConstant::Sub(x, y))
     }
 
-    pub fn create_dynamic_constant_mul(&mut self, x : DynamicConstantID,
-                                       y : DynamicConstantID) -> DynamicConstantID {
+    pub fn create_dynamic_constant_mul(
+        &mut self,
+        x: DynamicConstantID,
+        y: DynamicConstantID,
+    ) -> DynamicConstantID {
         self.intern_dynamic_constant(DynamicConstant::Mul(x, y))
     }
 
-    pub fn create_dynamic_constant_div(&mut self, x : DynamicConstantID,
-                                       y : DynamicConstantID) -> DynamicConstantID {
+    pub fn create_dynamic_constant_div(
+        &mut self,
+        x: DynamicConstantID,
+        y: DynamicConstantID,
+    ) -> DynamicConstantID {
         self.intern_dynamic_constant(DynamicConstant::Div(x, y))
     }
 
-    pub fn create_dynamic_constant_rem(&mut self, x: DynamicConstantID,
-                                       y: DynamicConstantID) -> DynamicConstantID {
+    pub fn create_dynamic_constant_rem(
+        &mut self,
+        x: DynamicConstantID,
+        y: DynamicConstantID,
+    ) -> DynamicConstantID {
         self.intern_dynamic_constant(DynamicConstant::Rem(x, y))
     }
 
@@ -561,11 +576,13 @@ impl NodeBuilder {
 
     pub fn build_call(
         &mut self,
+        control: NodeID,
         function: FunctionID,
         dynamic_constants: Box<[DynamicConstantID]>,
         args: Box<[NodeID]>,
     ) {
         self.node = Node::Call {
+            control,
             function,
             dynamic_constants,
             args,
diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
index eff103ee..3cca0e75 100644
--- a/hercules_ir/src/def_use.rs
+++ b/hercules_ir/src/def_use.rs
@@ -170,10 +170,15 @@ pub fn get_uses(node: &Node) -> NodeUses {
             op: _,
         } => NodeUses::Three([*first, *second, *third]),
         Node::Call {
+            control,
             function: _,
             dynamic_constants: _,
             args,
-        } => NodeUses::Variable(args.clone()),
+        } => {
+            let mut uses = vec![*control];
+            uses.extend(args);
+            NodeUses::Variable(uses.into_boxed_slice())
+        }
         Node::IntrinsicCall { intrinsic: _, args } => NodeUses::Variable(args.clone()),
         Node::Read { collect, indices } => {
             let mut uses = vec![];
@@ -262,10 +267,15 @@ pub fn get_uses_mut<'a>(node: &'a mut Node) -> NodeUsesMut<'a> {
             op: _,
         } => NodeUsesMut::Three([first, second, third]),
         Node::Call {
+            control,
             function: _,
             dynamic_constants: _,
             args,
-        } => NodeUsesMut::Variable(args.iter_mut().collect()),
+        } => {
+            let mut uses = vec![control];
+            uses.extend(args);
+            NodeUsesMut::Variable(uses.into_boxed_slice())
+        }
         Node::IntrinsicCall { intrinsic: _, args } => {
             NodeUsesMut::Variable(args.iter_mut().collect())
         }
diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 97520382..8c36c8ad 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -81,6 +81,7 @@ pub fn write_dot<W: Write>(
             let partition_color = plan.map(|plan| match plan.partition_devices[partition_idx] {
                 Device::CPU => "lightblue",
                 Device::GPU => "darkseagreen",
+                Device::AsyncRust => "plum2",
             });
             if let Some(partition_color) = partition_color {
                 write_partition_header(function_id, partition_idx, module, partition_color, w)?;
@@ -315,6 +316,7 @@ fn write_node<W: Write>(
         Node::Constant { id } => module.write_constant(*id, &mut suffix)?,
         Node::DynamicConstant { id } => module.write_dynamic_constant(*id, &mut suffix)?,
         Node::Call {
+            control: _,
             function,
             dynamic_constants,
             args: _,
diff --git a/hercules_ir/src/gcm.rs b/hercules_ir/src/gcm.rs
index 44d89ce8..394c596b 100644
--- a/hercules_ir/src/gcm.rs
+++ b/hercules_ir/src/gcm.rs
@@ -18,7 +18,7 @@ pub fn gcm(
     antideps: &Vec<(NodeID, NodeID)>,
     loops: &LoopTree,
     fork_join_map: &HashMap<NodeID, NodeID>,
-    partial_partition: &Vec<Option<PartitionID>>,
+    partial_partition: &mut Vec<Option<PartitionID>>,
 ) -> Vec<NodeID> {
     let mut bbs: Vec<Option<NodeID>> = vec![None; function.nodes.len()];
 
@@ -37,6 +37,12 @@ pub fn gcm(
                 init: _,
                 reduct: _,
             } => bbs[idx] = Some(control),
+            Node::Call {
+                control,
+                function: _,
+                dynamic_constants: _,
+                args: _,
+            } => bbs[idx] = Some(control),
             Node::Parameter { index: _ } => bbs[idx] = Some(NodeID::new(0)),
             Node::Constant { id: _ } => bbs[idx] = Some(NodeID::new(0)),
             Node::DynamicConstant { id: _ } => bbs[idx] = Some(NodeID::new(0)),
@@ -178,8 +184,20 @@ pub fn gcm(
         };
 
         // Look between the LCA and the schedule early location to place the
-        // node.
+        // node. If a data node can't be scheduled to any control nodes in its
+        // partition (this may happen if all of the control nodes in a partition
+        // got deleted, for example), then the data node can be scheduled into a
+        // control node in a different partition.
         let schedule_early = schedule_early[id.idx()].unwrap();
+        let need_to_repartition = !dom
+            .chain(lca.unwrap_or(schedule_early), schedule_early)
+            .any(|dominator| {
+                partial_partition[id.idx()].is_none()
+                    || partial_partition[dominator.idx()] == partial_partition[id.idx()]
+            });
+        if need_to_repartition {
+            partial_partition[id.idx()] = None;
+        }
         let mut chain = dom
             // If the node has no users, then it doesn't really matter where we
             // place it - just place it at the early placement.
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 7b96f6b0..9991004f 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -136,15 +136,8 @@ pub enum Index {
 }
 
 /*
- * Hercules IR is a combination of a possibly cylic control flow graph, and
- * many possibly cyclic data flow graphs. Each node represents some operation on
- * input values (including control), and produces some output value. Operations
- * that conceptually produce multiple outputs (such as an if node) produce a
- * product type instead. For example, the if node produces prod(control(N),
- * control(N)), where the first control token represents the false branch, and
- * the second control token represents the true branch. Functions are devoid of
- * side effects, so call nodes don't take as input or output control tokens.
- * There is also no global memory - use arrays.
+ * Hercules IR is a single flow graph per function - this flow graph mixes data
+ * and control flow.
  */
 #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub enum Node {
@@ -209,6 +202,7 @@ pub enum Node {
         op: TernaryOperator,
     },
     Call {
+        control: NodeID,
         function: FunctionID,
         dynamic_constants: Box<[DynamicConstantID]>,
         args: Box<[NodeID]>,
@@ -974,6 +968,15 @@ impl Node {
     define_pattern_predicate!(is_parameter, Node::Parameter { index: _ });
     define_pattern_predicate!(is_constant, Node::Constant { id: _ });
     define_pattern_predicate!(is_dynamic_constant, Node::DynamicConstant { id: _ });
+    define_pattern_predicate!(
+        is_call,
+        Node::Call {
+            control: _,
+            function: _,
+            dynamic_constants: _,
+            args: _
+        }
+    );
     define_pattern_predicate!(
         is_read,
         Node::Read {
@@ -1168,6 +1171,7 @@ impl Node {
                 op,
             } => op.upper_case_name(),
             Node::Call {
+                control: _,
                 function: _,
                 dynamic_constants: _,
                 args: _,
@@ -1239,6 +1243,7 @@ impl Node {
                 op,
             } => op.lower_case_name(),
             Node::Call {
+                control: _,
                 function: _,
                 dynamic_constants: _,
                 args: _,
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 5a16aced..ccd4b13f 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -564,16 +564,18 @@ fn parse_call<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
         parse_identifier,
     )(ir_text)?;
     let function = function_and_args.remove(0);
-    let args: Vec<NodeID> = function_and_args
+    let mut args: Vec<NodeID> = function_and_args
         .into_iter()
         .map(|x| context.borrow_mut().get_node_id(x))
         .collect();
+    let control = args.remove(0);
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::character::complete::char(')')(ir_text)?.0;
     let function = context.borrow_mut().get_function_id(function);
     Ok((
         ir_text,
         Node::Call {
+            control,
             function,
             dynamic_constants: dynamic_constants.into_boxed_slice(),
             args: args.into_boxed_slice(),
diff --git a/hercules_ir/src/schedule.rs b/hercules_ir/src/schedule.rs
index 7c9fdc10..2438a982 100644
--- a/hercules_ir/src/schedule.rs
+++ b/hercules_ir/src/schedule.rs
@@ -30,13 +30,16 @@ pub enum Schedule {
 
 /*
  * The authoritative enumeration of supported devices. Technically, a device
- * refers to a specific backend, so difference "devices" may refer to the same
+ * refers to a specific backend, so different "devices" may refer to the same
  * "kind" of hardware.
  */
 #[derive(Debug, Clone, Copy)]
 pub enum Device {
     CPU,
     GPU,
+    // Hercules function calls are placed in solitary function calls that are
+    // directly represented in the generated async Rust runtime code.
+    AsyncRust,
 }
 
 /*
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 64d46001..4475dbba 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -710,6 +710,7 @@ fn typeflow(
             Error(String::from("Unhandled ternary types."))
         }
         Node::Call {
+            control: _,
             function: callee_id,
             dynamic_constants: dc_args,
             args: _,
@@ -717,10 +718,10 @@ fn typeflow(
             let callee = &functions[callee_id.idx()];
 
             // Check number of run-time arguments.
-            if inputs.len() != callee.param_types.len() {
+            if inputs.len() - 1 != callee.param_types.len() {
                 return Error(format!(
                     "Call node has {} inputs, but calls a function with {} parameters.",
-                    inputs.len(),
+                    inputs.len() - 1,
                     callee.param_types.len(),
                 ));
             }
@@ -745,7 +746,7 @@ fn typeflow(
             }
 
             // Check argument types.
-            for (input, param_ty) in zip(inputs.iter(), callee.param_types.iter()) {
+            for (input, param_ty) in zip(inputs.iter().skip(1), callee.param_types.iter()) {
                 if let Concrete(input_id) = input {
                     if !types_match(types, dynamic_constants, dc_args, *param_ty, *input_id) {
                         return Error(String::from(
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index c86920f9..83ee5a50 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -144,7 +144,7 @@ fn verify_structure(
                 }
             }
             // A region node must have exactly one control user. Additionally,
-            // it may have many phi users.
+            // it may have many phi users xor one call user.
             Node::Region { preds: _ } => {
                 let mut found_control = false;
                 for user in users {
@@ -153,6 +153,13 @@ fn verify_structure(
                             control: _,
                             data: _,
                         } => {}
+
+                        Node::Call {
+                            control: _,
+                            function: _,
+                            dynamic_constants: _,
+                            args: _,
+                        } => {}
                         _ => {
                             if function.nodes[user.idx()].is_control() {
                                 if found_control {
@@ -161,7 +168,7 @@ fn verify_structure(
                                     found_control = true;
                                 }
                             } else {
-                                Err("All region of a start node must be control or Phi nodes.")?;
+                                Err("All users of a region node must be control or Phi nodes.")?;
                             }
                         }
                     }
@@ -261,8 +268,7 @@ fn verify_structure(
             }
             // Phi nodes must depend on a region node.
             Node::Phi { control, data: _ } => {
-                if let Node::Region { preds: _ } = function.nodes[control.idx()] {
-                } else {
+                if !function.nodes[control.idx()].is_region() {
                     Err("Phi node's control input must be a region node.")?;
                 }
             }
@@ -280,6 +286,23 @@ fn verify_structure(
                     Err("ThreadID node's control input must be a fork node.")?;
                 }
             }
+            // Call nodes must depend on a region node with no phi dependencies.
+            Node::Call {
+                control,
+                function: _,
+                dynamic_constants: _,
+                args: _,
+            } => {
+                if !function.nodes[control.idx()].is_region() {
+                    Err("Call node's control input must be a region node.")?;
+                }
+                let region_users = def_use.get_users(*control);
+                for user in region_users {
+                    if !function.nodes[user.idx()].is_control() && user.idx() != idx {
+                        Err("The region node used by a call node must have no other data users.")?;
+                    }
+                }
+            }
             // Collect nodes must depend on a join node.
             Node::Reduce {
                 control,
diff --git a/hercules_opt/src/ccp.rs b/hercules_opt/src/ccp.rs
index 5d13a6f4..57559079 100644
--- a/hercules_opt/src/ccp.rs
+++ b/hercules_opt/src/ccp.rs
@@ -340,7 +340,12 @@ pub fn collapse_region_chains(function: &mut Function, def_use: &ImmutableDefUse
     // over it.
     for id in (0..function.nodes.len()).map(NodeID::new) {
         if let Node::Region { preds } = &function.nodes[id.idx()] {
-            if preds.len() == 1 {
+            let has_call_user = def_use
+                .get_users(id)
+                .iter()
+                .any(|x| function.nodes[x.idx()].is_call());
+
+            if preds.len() == 1 && !has_call_user {
                 // Step 1: bridge gap between use and user.
                 let predecessor = preds[0];
                 let successor = def_use
@@ -733,6 +738,7 @@ fn ccp_flow_function(
         }
         // Call nodes are uninterpretable.
         Node::Call {
+            control: _,
             function: _,
             dynamic_constants: _,
             args,
diff --git a/hercules_opt/src/editor.rs b/hercules_opt/src/editor.rs
index 0bd4397b..1b182c57 100644
--- a/hercules_opt/src/editor.rs
+++ b/hercules_opt/src/editor.rs
@@ -388,7 +388,7 @@ pub fn repair_plan(plan: &mut Plan, new_function: &Function, edits: &[Edit]) {
     let mut worklist = VecDeque::from(added_control_nodes);
     while let Some(control_id) = worklist.pop_front() {
         let node = &new_function.nodes[control_id.idx()];
-        // There are five cases where this control node needs to start a new
+        // There are a few cases where this control node needs to start a new
         // partition:
         // 1. It's a non-gravestone start node. This is any start node visited
         //    by the reverse postorder.
@@ -398,6 +398,8 @@ pub fn repair_plan(plan: &mut Plan, new_function: &Function, edits: &[Edit]) {
         // 5. It's a region node where not every predecessor is in the same
         //    partition (equivalently, not every predecessor is in the same
         //    partition - only region nodes can have multiple predecessors).
+        // 6. It's a region node with a call user.
+        // 7. Its predecessor is a region node with a call user.
         let top_level_fork = node.is_fork() && fork_join_nesting[&control_id].len() == 1;
         let top_level_join = control_subgraph.preds(control_id).any(|pred| {
             new_function.nodes[pred.idx()].is_join() && fork_join_nesting[&pred].len() == 1
@@ -408,12 +410,26 @@ pub fn repair_plan(plan: &mut Plan, new_function: &Function, edits: &[Edit]) {
             .preds(control_id)
             .map(|pred| new_partitions[pred.idx()])
             .all_equal();
+        let region_with_call_user = |id: NodeID| {
+            new_function.nodes[id.idx()].is_region()
+                && def_use
+                    .get_users(id)
+                    .as_ref()
+                    .into_iter()
+                    .any(|id| new_function.nodes[id.idx()].is_call())
+        };
+        let call_region = region_with_call_user(control_id);
+        let pred_is_call_region = control_subgraph
+            .preds(control_id)
+            .any(|pred| region_with_call_user(pred));
 
         if node.is_start()
             || node.is_return()
             || top_level_fork
             || top_level_join
             || multi_pred_region
+            || call_region
+            || pred_is_call_region
         {
             // This control node goes in a new partition.
             let part_id = PartitionID::new(plan.num_partitions);
@@ -442,9 +458,15 @@ pub fn repair_plan(plan: &mut Plan, new_function: &Function, edits: &[Edit]) {
         &antideps,
         &loops,
         &fork_join_map,
-        &new_partitions,
+        &mut new_partitions,
     );
-    for data_id in added_data_nodes {
+    let added_and_to_repartition_data_nodes: Vec<NodeID> = new_partitions
+        .iter()
+        .enumerate()
+        .filter(|(_, part)| part.is_none())
+        .map(|(idx, _)| NodeID::new(idx))
+        .collect();
+    for data_id in added_and_to_repartition_data_nodes {
         new_partitions[data_id.idx()] = new_partitions[bbs[data_id.idx()].idx()];
     }
 
@@ -452,6 +474,12 @@ pub fn repair_plan(plan: &mut Plan, new_function: &Function, edits: &[Edit]) {
     plan.partitions = new_partitions.into_iter().map(|id| id.unwrap()).collect();
     plan.partition_devices
         .resize(plan.num_partitions, Device::CPU);
+    // Place call partitions on the "AsyncRust" device.
+    for idx in 0..new_function.nodes.len() {
+        if new_function.nodes[idx].is_call() {
+            plan.partition_devices[plan.partitions[idx].idx()] = Device::AsyncRust;
+        }
+    }
 }
 
 /*
diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs
index 5866c57c..f27f1f61 100644
--- a/hercules_opt/src/pass.rs
+++ b/hercules_opt/src/pass.rs
@@ -270,7 +270,7 @@ impl PassManager {
                             antideps,
                             loops,
                             fork_join_map,
-                            &vec![None; function.nodes.len()],
+                            &mut vec![None; function.nodes.len()],
                         )
                     },
                 )
@@ -564,7 +564,12 @@ impl PassManager {
                     for manifest in smodule.manifests.values() {
                         for partition_manifest in manifest.partitions.iter() {
                             let function = &smodule.functions[&partition_manifest.name];
-                            cpu_compile(function, partition_manifest, &mut llvm_ir).unwrap();
+                            match partition_manifest.device {
+                                DeviceManifest::CPU { parallel_launch: _ } => {
+                                    cpu_compile(function, partition_manifest, &mut llvm_ir).unwrap()
+                                }
+                                _ => todo!(),
+                            }
                         }
                     }
                     println!("{}", llvm_ir);
@@ -601,6 +606,7 @@ impl PassManager {
                     file.write_all(&hman_contents)
                         .expect("PANIC: Unable to write output manifest file contents.");
                     self.manifests = Some(smodule.manifests);
+                    println!("{:?}", self.manifests);
                 }
                 Pass::Serialize(output_file) => {
                     let module_contents: Vec<u8> = postcard::to_allocvec(&self.module).unwrap();
@@ -636,6 +642,7 @@ impl PassManager {
         self.doms = None;
         self.postdoms = None;
         self.fork_join_maps = None;
+        self.fork_join_nests = None;
         self.loops = None;
         self.antideps = None;
         self.bbs = None;
diff --git a/hercules_opt/src/phi_elim.rs b/hercules_opt/src/phi_elim.rs
index 21a1a2e5..8a47a12b 100644
--- a/hercules_opt/src/phi_elim.rs
+++ b/hercules_opt/src/phi_elim.rs
@@ -1,7 +1,10 @@
+extern crate bitvec;
 extern crate hercules_ir;
 
 use std::collections::HashMap;
 
+use self::bitvec::prelude::*;
+
 use self::hercules_ir::get_uses_mut;
 use self::hercules_ir::ir::*;
 
@@ -27,6 +30,26 @@ pub fn phi_elim(function: &mut Function) {
     // them with.
     let mut replace_nodes: HashMap<usize, NodeID> = HashMap::new();
 
+    // Determine region nodes that can't be removed, because they have a call
+    // user.
+    let mut has_call_user = bitvec![u8, Lsb0; 0; function.nodes.len()];
+    for idx in 0..function.nodes.len() {
+        if let Node::Call {
+            control,
+            function: _,
+            dynamic_constants: _,
+            args: _,
+        } = function.nodes[idx]
+        {
+            assert!(
+                !has_call_user[control.idx()],
+                "PANIC: Found region node with two call users ({:?}).",
+                control
+            );
+            has_call_user.set(control.idx(), true);
+        }
+    }
+
     // Iterate over the nodes of the function until convergence. In this loop,
     // we look for phis and regions that can be eliminated, mark them as
     // gravestones, and add them to the replacement map. For all other nodes, we
@@ -70,7 +93,7 @@ pub fn phi_elim(function: &mut Function) {
                     *node = Node::Start;
                 }
             } else if let Node::Region { preds } = node {
-                if preds.len() == 1 {
+                if preds.len() == 1 && !has_call_user[idx] {
                     changed = true;
                     replace_nodes.insert(idx, preds[0]);
                     // Delete this node.
diff --git a/hercules_opt/src/sroa.rs b/hercules_opt/src/sroa.rs
index c48b85ae..cb5ecd25 100644
--- a/hercules_opt/src/sroa.rs
+++ b/hercules_opt/src/sroa.rs
@@ -116,6 +116,7 @@ pub fn sroa(
                 }
             }
             Node::Call {
+                control: _,
                 function: _,
                 dynamic_constants: _,
                 args: _,
diff --git a/hercules_samples/call.hir b/hercules_samples/call.hir
index 44748934..5e884ecb 100644
--- a/hercules_samples/call.hir
+++ b/hercules_samples/call.hir
@@ -1,7 +1,8 @@
 fn myfunc(x: i32) -> i32
-  y = call(add, x, x)
-  r = return(start, y)
+  cr = region(start)
+  y = call(add, cr, x, x)
+  r = return(cr, y)
 
 fn add(x: i32, y: i32) -> i32
   w = add(x, y)
-  r = return(start, w)
\ No newline at end of file
+  r = return(start, w)
diff --git a/hercules_samples/fac/fac.hir b/hercules_samples/fac/fac.hir
index e43dd8ca..0d85c5d0 100644
--- a/hercules_samples/fac/fac.hir
+++ b/hercules_samples/fac/fac.hir
@@ -1,4 +1,4 @@
-fn fac(x: i32) -> i32
+fn fac_inner(x: i32) -> i32
   zero = constant(i32, 0)
   one = constant(i32, 1)
   loop = region(start, if_true)
@@ -11,3 +11,8 @@ fn fac(x: i32) -> i32
   if_false = projection(if, 0)
   if_true = projection(if, 1)
   r = return(if_false, fac_acc)
+
+fn fac(x: i32) -> i32
+  cr = region(start)
+  call = call(fac_inner, cr, x)
+  r = return(cr, call)
diff --git a/juno_frontend/examples/simple1.jn b/juno_frontend/examples/simple1.jn
index 355e8b8a..24088929 100644
--- a/juno_frontend/examples/simple1.jn
+++ b/juno_frontend/examples/simple1.jn
@@ -1,3 +1,7 @@
-fn simple1(x : i32, y : i32) -> i32 {
+fn simple1_inner(x : i32, y : i32) -> i32 {
   return x + y;
 }
+
+fn simple(x : i32, y : i32) -> i32 {
+  return simple1_inner(x, y);
+}
diff --git a/juno_frontend/src/codegen.rs b/juno_frontend/src/codegen.rs
index a2249812..ebc2bfb4 100644
--- a/juno_frontend/src/codegen.rs
+++ b/juno_frontend/src/codegen.rs
@@ -2,54 +2,59 @@ extern crate hercules_ir;
 
 use std::collections::{HashMap, VecDeque};
 
+use self::hercules_ir::build::*;
 use self::hercules_ir::ir;
 use self::hercules_ir::ir::*;
-use self::hercules_ir::build::*;
 
-use crate::ssa::SSA;
 use crate::semant;
-use crate::semant::{Prg, Function, Stmt, Expr, Literal, UnaryOp, BinaryOp};
-use crate::types::{TypeSolver, TypeSolverInst, Primitive, Either};
+use crate::semant::{BinaryOp, Expr, Function, Literal, Prg, Stmt, UnaryOp};
+use crate::ssa::SSA;
+use crate::types::{Either, Primitive, TypeSolver, TypeSolverInst};
 
 // Loop info is a stack of the loop levels, recording the latch and exit block of each
 type LoopInfo = Vec<(NodeID, NodeID)>;
 
-pub fn codegen_program(prg : Prg) -> Module {
+pub fn codegen_program(prg: Prg) -> Module {
     CodeGenerator::build(prg)
 }
 
 struct CodeGenerator<'a> {
-    builder   : Builder<'a>,
-    types     : &'a TypeSolver,
-    funcs     : &'a Vec<Function>,
-    uid       : usize,
+    builder: Builder<'a>,
+    types: &'a TypeSolver,
+    funcs: &'a Vec<Function>,
+    uid: usize,
     // The function map tracks a map from function index and set of type variables to its function
     // id in the builder
-    functions : HashMap<(usize, Vec<TypeID>), FunctionID>,
+    functions: HashMap<(usize, Vec<TypeID>), FunctionID>,
     // The worklist tracks a list of functions to codegen, tracking the function's id, its
     // type-solving instantiation (account for the type parameters), the function id, and the entry
     // block id
-    worklist  : VecDeque<(usize, TypeSolverInst<'a>, FunctionID, NodeID)>,
+    worklist: VecDeque<(usize, TypeSolverInst<'a>, FunctionID, NodeID)>,
 }
 
 impl CodeGenerator<'_> {
-    fn build((types, funcs) : Prg) -> Module {
+    fn build((types, funcs): Prg) -> Module {
         // Identify the functions (by index) which have no type arguments, these are the ones we
         // ask for code to be generated for
-        let func_idx
-            = funcs.iter().enumerate()
-                  .filter_map(|(i, f)|
-                                if f.num_type_args == 0 { Some(i) } else { None });
-
-        let mut codegen = CodeGenerator { builder   : Builder::create(),
-                                          types     : &types,
-                                          funcs     : &funcs,
-                                          uid       : 0,
-                                          functions : HashMap::new(),
-                                          worklist  : VecDeque::new(), };
+        let func_idx =
+            funcs
+                .iter()
+                .enumerate()
+                .filter_map(|(i, f)| if f.num_type_args == 0 { Some(i) } else { None });
+
+        let mut codegen = CodeGenerator {
+            builder: Builder::create(),
+            types: &types,
+            funcs: &funcs,
+            uid: 0,
+            functions: HashMap::new(),
+            worklist: VecDeque::new(),
+        };
 
         // Add the identifed functions to the list to code-gen
-        func_idx.for_each(|i| { let _ = codegen.get_function(i, vec![]); });
+        func_idx.for_each(|i| {
+            let _ = codegen.get_function(i, vec![]);
+        });
 
         codegen.finish()
     }
@@ -63,7 +68,7 @@ impl CodeGenerator<'_> {
         self.builder.finish()
     }
 
-    fn get_function(&mut self, func_idx : usize, ty_args : Vec<TypeID>) -> FunctionID {
+    fn get_function(&mut self, func_idx: usize, ty_args: Vec<TypeID>) -> FunctionID {
         let func_info = (func_idx, ty_args);
         match self.functions.get(&func_info) {
             Some(func_id) => *func_id,
@@ -85,20 +90,26 @@ impl CodeGenerator<'_> {
 
                 let return_type = solver_inst.lower_type(&mut self.builder, func.return_type);
 
-                let (func_id, entry)
-                    = self.builder.create_function(
-                        &name, param_types, return_type,
-                        func.num_dyn_consts as u32).unwrap();
+                let (func_id, entry) = self
+                    .builder
+                    .create_function(&name, param_types, return_type, func.num_dyn_consts as u32)
+                    .unwrap();
 
                 self.functions.insert((func_idx, ty_args), func_id);
-                self.worklist.push_back((func_idx, solver_inst, func_id, entry));
+                self.worklist
+                    .push_back((func_idx, solver_inst, func_id, entry));
                 func_id
-            },
+            }
         }
     }
 
-    fn codegen_function(&mut self, func : &Function, types : &mut TypeSolverInst,
-                        func_id : FunctionID, entry : NodeID) {
+    fn codegen_function(
+        &mut self,
+        func: &Function,
+        types: &mut TypeSolverInst,
+        func_id: FunctionID,
+        entry: NodeID,
+    ) {
         // Setup the SSA construction data structure
         let mut ssa = SSA::new(func_id, entry);
 
@@ -111,35 +122,40 @@ impl CodeGenerator<'_> {
         }
 
         // Generate code for the body
-        let None = self.codegen_stmt(&func.body, types, &mut ssa,
-                                     func_id, entry, &mut vec![])
-            else { panic!("Generated code for a function missing a return") };
+        let None = self.codegen_stmt(&func.body, types, &mut ssa, func_id, entry, &mut vec![])
+        else {
+            panic!("Generated code for a function missing a return")
+        };
     }
 
-    fn codegen_stmt(&mut self, stmt : &Stmt, types : &mut TypeSolverInst,
-                    ssa : &mut SSA, func_id : FunctionID, cur_block : NodeID,
-                    loops : &mut LoopInfo) -> Option<NodeID> {
+    fn codegen_stmt(
+        &mut self,
+        stmt: &Stmt,
+        types: &mut TypeSolverInst,
+        ssa: &mut SSA,
+        func_id: FunctionID,
+        cur_block: NodeID,
+        loops: &mut LoopInfo,
+    ) -> Option<NodeID> {
         match stmt {
             Stmt::AssignStmt { var, val } => {
                 let (val, block) = self.codegen_expr(val, types, ssa, func_id, cur_block);
                 ssa.write_variable(*var, block, val);
                 Some(block)
-            },
+            }
             Stmt::IfStmt { cond, thn, els } => {
-                let (val_cond, block_cond)
-                    = self.codegen_expr(cond, types, ssa, func_id, cur_block);
-                let (mut if_node, block_then, block_else)
-                    = ssa.create_cond(&mut self.builder, block_cond);
-
-                let then_end = self.codegen_stmt(thn, types, ssa,
-                                                 func_id, block_then, loops);
-                let else_end =
-                    match els {
-                        None => Some(block_else),
-                        Some(els_stmt) =>
-                            self.codegen_stmt(els_stmt, types, ssa,
-                                              func_id, block_else, loops),
-                    };
+                let (val_cond, block_cond) =
+                    self.codegen_expr(cond, types, ssa, func_id, cur_block);
+                let (mut if_node, block_then, block_else) =
+                    ssa.create_cond(&mut self.builder, block_cond);
+
+                let then_end = self.codegen_stmt(thn, types, ssa, func_id, block_then, loops);
+                let else_end = match els {
+                    None => Some(block_else),
+                    Some(els_stmt) => {
+                        self.codegen_stmt(els_stmt, types, ssa, func_id, block_else, loops)
+                    }
+                };
 
                 if_node.build_if(block_cond, val_cond);
                 let _ = self.builder.add_node(if_node);
@@ -153,16 +169,16 @@ impl CodeGenerator<'_> {
                         ssa.add_pred(block_join, else_term);
                         ssa.seal_block(block_join, &mut self.builder);
                         Some(block_join)
-                    },
+                    }
                 }
-            },
+            }
             Stmt::LoopStmt { cond, update, body } => {
                 // We generate guarded loops, so the first step is to create
                 // a conditional branch, branching on the condition
-                let (val_guard, block_guard)
-                    = self.codegen_expr(cond, types, ssa, func_id, cur_block);
-                let (mut if_node, true_guard, false_proj)
-                    = ssa.create_cond(&mut self.builder, block_guard);
+                let (val_guard, block_guard) =
+                    self.codegen_expr(cond, types, ssa, func_id, cur_block);
+                let (mut if_node, true_guard, false_proj) =
+                    ssa.create_cond(&mut self.builder, block_guard);
                 if_node.build_if(block_guard, val_guard);
                 let _ = self.builder.add_node(if_node);
 
@@ -175,18 +191,17 @@ impl CodeGenerator<'_> {
                 let block_latch = ssa.create_block(&mut self.builder);
 
                 // Code-gen any update into the latch and then code-gen the condition
-                let block_updated =
-                    match update {
-                        None => block_latch,
-                        Some(stmt) =>
-                            self.codegen_stmt(stmt, types, ssa, func_id, block_latch, loops)
-                                .expect("Loop update should return control"),
-                    };
-                let (val_cond, block_cond)
-                    = self.codegen_expr(cond, types, ssa, func_id, block_updated);
-
-                let (mut if_node, true_proj, false_proj)
-                    = ssa.create_cond(&mut self.builder, block_cond);
+                let block_updated = match update {
+                    None => block_latch,
+                    Some(stmt) => self
+                        .codegen_stmt(stmt, types, ssa, func_id, block_latch, loops)
+                        .expect("Loop update should return control"),
+                };
+                let (val_cond, block_cond) =
+                    self.codegen_expr(cond, types, ssa, func_id, block_updated);
+
+                let (mut if_node, true_proj, false_proj) =
+                    ssa.create_cond(&mut self.builder, block_cond);
                 if_node.build_if(block_cond, val_cond);
                 let _ = self.builder.add_node(if_node);
 
@@ -208,10 +223,10 @@ impl CodeGenerator<'_> {
                 // If the body of the loop can reach some block, we add that block as a predecessor
                 // of the latch
                 match body_res {
-                    None => {},
+                    None => {}
                     Some(block) => {
                         ssa.add_pred(block_latch, block);
-                    },
+                    }
                 }
 
                 // Seal remaining open blocks
@@ -221,56 +236,58 @@ impl CodeGenerator<'_> {
                 // It is always assumed a loop may be skipped and so control can reach after the
                 // loop
                 Some(block_exit)
-            },
+            }
             Stmt::ReturnStmt { expr } => {
-                let (val_ret, block_ret)
-                    = self.codegen_expr(expr, types, ssa, func_id, cur_block);
+                let (val_ret, block_ret) = self.codegen_expr(expr, types, ssa, func_id, cur_block);
                 let mut return_node = self.builder.allocate_node(func_id);
                 return_node.build_return(block_ret, val_ret);
                 let _ = self.builder.add_node(return_node);
                 None
-            },
+            }
             Stmt::BreakStmt {} => {
                 let last_loop = loops.len() - 1;
                 let (_latch, exit) = loops[last_loop];
                 ssa.add_pred(exit, cur_block); // The block that contains this break now leads to
                                                // the exit
                 None
-            },
+            }
             Stmt::ContinueStmt {} => {
                 let last_loop = loops.len() - 1;
                 let (latch, _exit) = loops[last_loop];
                 ssa.add_pred(latch, cur_block); // The block that contains this continue now leads
                                                 // to the latch
                 None
-            },
+            }
             Stmt::BlockStmt { body } => {
                 let mut block = Some(cur_block);
                 for stmt in body.iter() {
-                    block = self.codegen_stmt(stmt, types, ssa, func_id,
-                                              block.unwrap(), loops);
+                    block = self.codegen_stmt(stmt, types, ssa, func_id, block.unwrap(), loops);
                 }
                 block
-            },
+            }
             Stmt::ExprStmt { expr } => {
-                let (_val, block)
-                    = self.codegen_expr(expr, types, ssa, func_id, cur_block);
+                let (_val, block) = self.codegen_expr(expr, types, ssa, func_id, cur_block);
                 Some(block)
-            },
+            }
         }
     }
-    
+
     // The codegen_expr function returns a pair of node IDs, the first is the node whose value is
     // the given expression and the second is the node of a control node at which the value is
     // available
-    fn codegen_expr(&mut self, expr : &Expr, types : &mut TypeSolverInst,
-                    ssa : &mut SSA, func_id : FunctionID, cur_block : NodeID) 
-        -> (NodeID, NodeID) {
+    fn codegen_expr(
+        &mut self,
+        expr: &Expr,
+        types: &mut TypeSolverInst,
+        ssa: &mut SSA,
+        func_id: FunctionID,
+        cur_block: NodeID,
+    ) -> (NodeID, NodeID) {
         match expr {
-            Expr::Variable { var, .. } => {
-                (ssa.read_variable(*var, cur_block, &mut self.builder),
-                 cur_block)
-            },
+            Expr::Variable { var, .. } => (
+                ssa.read_variable(*var, cur_block, &mut self.builder),
+                cur_block,
+            ),
             Expr::DynConst { val, .. } => {
                 let mut node = self.builder.allocate_node(func_id);
                 let node_id = node.id();
@@ -278,63 +295,59 @@ impl CodeGenerator<'_> {
                 node.build_dynamicconstant(dyn_const);
                 let _ = self.builder.add_node(node);
                 (node_id, cur_block)
-            },
+            }
             Expr::Read { index, val, .. } => {
-                let (collection, block)
-                    = self.codegen_expr(val, types, ssa, func_id, cur_block);
-                let (indices, end_block) 
-                    = self.codegen_indices(index, types, ssa, func_id, block);
+                let (collection, block) = self.codegen_expr(val, types, ssa, func_id, cur_block);
+                let (indices, end_block) = self.codegen_indices(index, types, ssa, func_id, block);
 
                 let mut node = self.builder.allocate_node(func_id);
                 let node_id = node.id();
                 node.build_read(collection, indices.into());
                 let _ = self.builder.add_node(node);
                 (node_id, end_block)
-            },
-            Expr::Write { index, val, rep, .. } => {
-                let (collection, block)
-                    = self.codegen_expr(val, types, ssa, func_id, cur_block);
-                let (indices, idx_block) 
-                    = self.codegen_indices(index, types, ssa, func_id, block);
-                let (replace, end_block)
-                    = self.codegen_expr(rep, types, ssa, func_id, idx_block);
+            }
+            Expr::Write {
+                index, val, rep, ..
+            } => {
+                let (collection, block) = self.codegen_expr(val, types, ssa, func_id, cur_block);
+                let (indices, idx_block) = self.codegen_indices(index, types, ssa, func_id, block);
+                let (replace, end_block) = self.codegen_expr(rep, types, ssa, func_id, idx_block);
 
                 let mut node = self.builder.allocate_node(func_id);
                 let node_id = node.id();
                 node.build_write(collection, replace, indices.into());
                 let _ = self.builder.add_node(node);
                 (node_id, end_block)
-            },
+            }
             Expr::Tuple { vals, typ } => {
                 let mut block = cur_block;
                 let mut values = vec![];
                 for expr in vals {
-                    let (val_expr, block_expr)
-                        = self.codegen_expr(expr, types, ssa, func_id, block);
+                    let (val_expr, block_expr) =
+                        self.codegen_expr(expr, types, ssa, func_id, block);
                     block = block_expr;
                     values.push(val_expr);
                 }
-                
+
                 let tuple_type = types.lower_type(&mut self.builder, *typ);
                 (self.build_tuple(values, tuple_type, func_id), block)
-            },
+            }
             Expr::Union { tag, val, typ } => {
-                let (value, block)
-                    = self.codegen_expr(val, types, ssa, func_id, cur_block);
+                let (value, block) = self.codegen_expr(val, types, ssa, func_id, cur_block);
 
                 let union_type = types.lower_type(&mut self.builder, *typ);
                 (self.build_union(*tag, value, union_type, func_id), block)
-            },
+            }
             Expr::Constant { val, .. } => {
                 let const_id = self.build_constant(val, types);
-                
+
                 let mut val = self.builder.allocate_node(func_id);
                 let val_node = val.id();
                 val.build_constant(const_id);
                 let _ = self.builder.add_node(val);
 
                 (val_node, cur_block)
-            },
+            }
             Expr::Zero { typ } => {
                 let type_id = types.lower_type(&mut self.builder, *typ);
                 let zero_const = self.builder.create_constant_zero(type_id);
@@ -344,57 +357,58 @@ impl CodeGenerator<'_> {
                 let _ = self.builder.add_node(zero);
 
                 (zero_val, cur_block)
-            },
+            }
             Expr::UnaryExp { op, expr, .. } => {
-                let (val, block)
-                    = self.codegen_expr(expr, types, ssa, func_id, cur_block);
-                
+                let (val, block) = self.codegen_expr(expr, types, ssa, func_id, cur_block);
+
                 let mut expr = self.builder.allocate_node(func_id);
                 let expr_id = expr.id();
-                expr.build_unary(val,
-                                 match op {
-                                     UnaryOp::Negation => UnaryOperator::Neg,
-                                     UnaryOp::BitwiseNot => UnaryOperator::Not,
-                                 });
+                expr.build_unary(
+                    val,
+                    match op {
+                        UnaryOp::Negation => UnaryOperator::Neg,
+                        UnaryOp::BitwiseNot => UnaryOperator::Not,
+                    },
+                );
                 let _ = self.builder.add_node(expr);
 
                 (expr_id, block)
-            },
+            }
             Expr::BinaryExp { op, lhs, rhs, .. } => {
-                let (val_lhs, block_lhs)
-                    = self.codegen_expr(lhs, types, ssa, func_id, cur_block);
-                let (val_rhs, block_rhs)
-                    = self.codegen_expr(rhs, types, ssa, func_id, block_lhs);
+                let (val_lhs, block_lhs) = self.codegen_expr(lhs, types, ssa, func_id, cur_block);
+                let (val_rhs, block_rhs) = self.codegen_expr(rhs, types, ssa, func_id, block_lhs);
 
                 let mut expr = self.builder.allocate_node(func_id);
                 let expr_id = expr.id();
-                expr.build_binary(val_lhs, val_rhs,
-                                  match op {
-                                      BinaryOp::Add    => BinaryOperator::Add,
-                                      BinaryOp::Sub    => BinaryOperator::Sub,
-                                      BinaryOp::Mul    => BinaryOperator::Mul,
-                                      BinaryOp::Div    => BinaryOperator::Div,
-                                      BinaryOp::Mod    => BinaryOperator::Rem,
-                                      BinaryOp::BitAnd => BinaryOperator::And,
-                                      BinaryOp::BitOr  => BinaryOperator::Or,
-                                      BinaryOp::Xor    => BinaryOperator::Xor,
-                                      BinaryOp::Lt     => BinaryOperator::LT,
-                                      BinaryOp::Le     => BinaryOperator::LTE,
-                                      BinaryOp::Gt     => BinaryOperator::GT,
-                                      BinaryOp::Ge     => BinaryOperator::GTE,
-                                      BinaryOp::Eq     => BinaryOperator::EQ,
-                                      BinaryOp::Neq    => BinaryOperator::NE,
-                                      BinaryOp::LShift => BinaryOperator::LSh,
-                                      BinaryOp::RShift => BinaryOperator::RSh,
-                                  });
+                expr.build_binary(
+                    val_lhs,
+                    val_rhs,
+                    match op {
+                        BinaryOp::Add => BinaryOperator::Add,
+                        BinaryOp::Sub => BinaryOperator::Sub,
+                        BinaryOp::Mul => BinaryOperator::Mul,
+                        BinaryOp::Div => BinaryOperator::Div,
+                        BinaryOp::Mod => BinaryOperator::Rem,
+                        BinaryOp::BitAnd => BinaryOperator::And,
+                        BinaryOp::BitOr => BinaryOperator::Or,
+                        BinaryOp::Xor => BinaryOperator::Xor,
+                        BinaryOp::Lt => BinaryOperator::LT,
+                        BinaryOp::Le => BinaryOperator::LTE,
+                        BinaryOp::Gt => BinaryOperator::GT,
+                        BinaryOp::Ge => BinaryOperator::GTE,
+                        BinaryOp::Eq => BinaryOperator::EQ,
+                        BinaryOp::Neq => BinaryOperator::NE,
+                        BinaryOp::LShift => BinaryOperator::LSh,
+                        BinaryOp::RShift => BinaryOperator::RSh,
+                    },
+                );
                 let _ = self.builder.add_node(expr);
 
                 (expr_id, block_rhs)
-            },
+            }
             Expr::CastExpr { expr, typ } => {
                 let type_id = types.lower_type(&mut self.builder, *typ);
-                let (val, block)
-                    = self.codegen_expr(expr, types, ssa, func_id, cur_block);
+                let (val, block) = self.codegen_expr(expr, types, ssa, func_id, cur_block);
 
                 let mut expr = self.builder.allocate_node(func_id);
                 let expr_id = expr.id();
@@ -402,23 +416,23 @@ impl CodeGenerator<'_> {
                 let _ = self.builder.add_node(expr);
 
                 (expr_id, block)
-            },
+            }
             Expr::CondExpr { cond, thn, els, .. } => {
                 // Code-gen the condition
-                let (val_cond, block_cond)
-                    = self.codegen_expr(cond, types, ssa, func_id, cur_block);
+                let (val_cond, block_cond) =
+                    self.codegen_expr(cond, types, ssa, func_id, cur_block);
 
                 // Create the if
-                let (mut if_builder, then_block, else_block)
-                    = ssa.create_cond(&mut self.builder, block_cond);
+                let (mut if_builder, then_block, else_block) =
+                    ssa.create_cond(&mut self.builder, block_cond);
                 if_builder.build_if(block_cond, val_cond);
                 let _ = self.builder.add_node(if_builder);
 
                 // Code-gen the branches
-                let (then_val, block_then)
-                    = self.codegen_expr(thn, types, ssa, func_id, then_block);
-                let (else_val, block_else)
-                    = self.codegen_expr(els, types, ssa, func_id, else_block);
+                let (then_val, block_then) =
+                    self.codegen_expr(thn, types, ssa, func_id, then_block);
+                let (else_val, block_else) =
+                    self.codegen_expr(els, types, ssa, func_id, else_block);
 
                 // Create the join in the control-flow
                 let join = ssa.create_block(&mut self.builder);
@@ -433,8 +447,14 @@ impl CodeGenerator<'_> {
                 let _ = self.builder.add_node(phi);
 
                 (phi_id, join)
-            },
-            Expr::CallExpr { func, ty_args, dyn_consts, args, .. } => {
+            }
+            Expr::CallExpr {
+                func,
+                ty_args,
+                dyn_consts,
+                args,
+                ..
+            } => {
                 // We start by lowering the type arguments to TypeIDs
                 let mut type_params = vec![];
                 for typ in ty_args {
@@ -445,8 +465,8 @@ impl CodeGenerator<'_> {
                 let call_func = self.get_function(*func, type_params);
 
                 // We then build the dynamic constants
-                let dynamic_constants
-                    = TypeSolverInst::build_dyn_consts(&mut self.builder, dyn_consts);
+                let dynamic_constants =
+                    TypeSolverInst::build_dyn_consts(&mut self.builder, dyn_consts);
 
                 // Code gen for each argument in order
                 // For inouts, this becomes an ssa.read_variable
@@ -457,22 +477,34 @@ impl CodeGenerator<'_> {
                 for arg in args {
                     match arg {
                         Either::Left(exp) => {
-                            let (val, new_block)
-                                = self.codegen_expr(exp, types, ssa, func_id, block);
+                            let (val, new_block) =
+                                self.codegen_expr(exp, types, ssa, func_id, block);
                             block = new_block;
                             arg_vals.push(val);
-                        },
+                        }
                         Either::Right(var) => {
                             inouts.push(*var);
                             arg_vals.push(ssa.read_variable(*var, block, &mut self.builder));
-                        },
+                        }
                     }
                 }
 
-                // Create the call expression
+                // Create the call expression, a region specifically for it, and a region after that.
+                let mut call_region = self.builder.allocate_node(func_id);
+                let mut after_call_region = self.builder.allocate_node(func_id);
                 let mut call = self.builder.allocate_node(func_id);
                 let call_id = call.id();
-                call.build_call(call_func, dynamic_constants.into(), arg_vals.into());
+                call_region.build_region(Box::new([block]));
+                after_call_region.build_region(Box::new([call_region.id()]));
+                block = after_call_region.id();
+                call.build_call(
+                    call_region.id(),
+                    call_func,
+                    dynamic_constants.into(),
+                    arg_vals.into(),
+                );
+                let _ = self.builder.add_node(call_region);
+                let _ = self.builder.add_node(after_call_region);
                 let _ = self.builder.add_node(call);
 
                 // Read each of the "inout values" and perform the SSA update
@@ -486,7 +518,7 @@ impl CodeGenerator<'_> {
 
                     ssa.write_variable(var, block, read_id);
                 }
-                
+
                 // Read the "actual return" value and return it
                 let value_index = self.builder.create_field_index(0);
                 let mut read = self.builder.allocate_node(func_id);
@@ -495,14 +527,18 @@ impl CodeGenerator<'_> {
                 let _ = self.builder.add_node(read);
 
                 (read_id, block)
-            },
-            Expr::Intrinsic { id, ty_args : _, args, .. } => {
+            }
+            Expr::Intrinsic {
+                id,
+                ty_args: _,
+                args,
+                ..
+            } => {
                 // Code gen for each argument in order
                 let mut block = cur_block;
                 let mut arg_vals = vec![];
                 for arg in args {
-                    let (val, new_block)
-                        = self.codegen_expr(arg, types, ssa, func_id, block);
+                    let (val, new_block) = self.codegen_expr(arg, types, ssa, func_id, block);
                     block = new_block;
                     arg_vals.push(val);
                 }
@@ -514,45 +550,48 @@ impl CodeGenerator<'_> {
                 let _ = self.builder.add_node(call);
 
                 (call_id, block)
-            },
+            }
         }
     }
 
     // Convert a list of Index from the semantic analysis into a list of indices for the builder.
     // Note that this takes and returns a block since expressions may involve control flow
-    fn codegen_indices(&mut self, index : &Vec<semant::Index>, types : &mut TypeSolverInst,
-                       ssa : &mut SSA, func_id : FunctionID, cur_block : NodeID)
-        -> (Vec<ir::Index>, NodeID) {
-
+    fn codegen_indices(
+        &mut self,
+        index: &Vec<semant::Index>,
+        types: &mut TypeSolverInst,
+        ssa: &mut SSA,
+        func_id: FunctionID,
+        cur_block: NodeID,
+    ) -> (Vec<ir::Index>, NodeID) {
         let mut block = cur_block;
         let mut built_index = vec![];
         for idx in index {
             match idx {
                 semant::Index::Field(idx) => {
                     built_index.push(self.builder.create_field_index(*idx));
-                },
+                }
                 semant::Index::Variant(idx) => {
                     built_index.push(self.builder.create_variant_index(*idx));
-                },
+                }
                 semant::Index::Array(exps) => {
                     let mut expr_vals = vec![];
                     for exp in exps {
-                        let (val, new_block) =
-                            self.codegen_expr(exp, types, ssa, func_id, block);
+                        let (val, new_block) = self.codegen_expr(exp, types, ssa, func_id, block);
                         block = new_block;
                         expr_vals.push(val);
                     }
                     built_index.push(self.builder.create_position_index(expr_vals.into()));
-                },
+                }
             }
         }
 
         (built_index, block)
     }
 
-    fn build_tuple(&mut self, exprs : Vec<NodeID>, typ : TypeID, func_id : FunctionID) -> NodeID {
+    fn build_tuple(&mut self, exprs: Vec<NodeID>, typ: TypeID, func_id: FunctionID) -> NodeID {
         let zero_const = self.builder.create_constant_zero(typ);
-        
+
         let mut zero = self.builder.allocate_node(func_id);
         let zero_val = zero.id();
         zero.build_constant(zero_const);
@@ -571,11 +610,10 @@ impl CodeGenerator<'_> {
 
         val
     }
-    
-    fn build_union(&mut self, tag : usize, val : NodeID, typ : TypeID,
-                   func_id : FunctionID) -> NodeID {
+
+    fn build_union(&mut self, tag: usize, val: NodeID, typ: TypeID, func_id: FunctionID) -> NodeID {
         let zero_const = self.builder.create_constant_zero(typ);
-        
+
         let mut zero = self.builder.allocate_node(func_id);
         let zero_val = zero.id();
         zero.build_constant(zero_const);
@@ -591,23 +629,22 @@ impl CodeGenerator<'_> {
         write_id
     }
 
-    fn build_constant<'a>(&mut self, (lit, typ) : &semant::Constant,
-                          types : &mut TypeSolverInst<'a>) -> ConstantID {
+    fn build_constant<'a>(
+        &mut self,
+        (lit, typ): &semant::Constant,
+        types: &mut TypeSolverInst<'a>,
+    ) -> ConstantID {
         match lit {
-            Literal::Unit => {
-                self.builder.create_constant_prod(vec![].into())
-            },
-            Literal::Bool(val) => {
-                self.builder.create_constant_bool(*val)
-            },
+            Literal::Unit => self.builder.create_constant_prod(vec![].into()),
+            Literal::Bool(val) => self.builder.create_constant_bool(*val),
             Literal::Integer(val) => {
                 let p = types.as_numeric_type(&mut self.builder, *typ);
                 match p {
-                    Primitive::I8  => self.builder.create_constant_i8(*val as i8),
+                    Primitive::I8 => self.builder.create_constant_i8(*val as i8),
                     Primitive::I16 => self.builder.create_constant_i16(*val as i16),
                     Primitive::I32 => self.builder.create_constant_i32(*val as i32),
                     Primitive::I64 => self.builder.create_constant_i64(*val as i64),
-                    Primitive::U8  => self.builder.create_constant_u8(*val as u8),
+                    Primitive::U8 => self.builder.create_constant_u8(*val as u8),
                     Primitive::U16 => self.builder.create_constant_u16(*val as u16),
                     Primitive::U32 => self.builder.create_constant_u32(*val as u32),
                     Primitive::U64 => self.builder.create_constant_u64(*val as u64),
@@ -615,7 +652,7 @@ impl CodeGenerator<'_> {
                     Primitive::F64 => self.builder.create_constant_f64(*val as f64),
                     _ => panic!("Internal error in build_constant for integer"),
                 }
-            },
+            }
             Literal::Float(val) => {
                 let p = types.as_numeric_type(&mut self.builder, *typ);
                 match p {
@@ -623,19 +660,21 @@ impl CodeGenerator<'_> {
                     Primitive::F64 => self.builder.create_constant_f64(*val as f64),
                     _ => panic!("Internal error in build_constant for float"),
                 }
-            },
+            }
             Literal::Tuple(vals) => {
                 let mut constants = vec![];
                 for val in vals {
                     constants.push(self.build_constant(val, types));
                 }
                 self.builder.create_constant_prod(constants.into())
-            },
+            }
             Literal::Sum(tag, val) => {
                 let constant = self.build_constant(val, types);
                 let type_id = types.lower_type(&mut self.builder, *typ);
-                self.builder.create_constant_sum(type_id, *tag as u32, constant).unwrap()
-            },
+                self.builder
+                    .create_constant_sum(type_id, *tag as u32, constant)
+                    .unwrap()
+            }
         }
     }
 }
diff --git a/juno_frontend/src/main.rs b/juno_frontend/src/main.rs
index 2b0dc6b7..72acd88d 100644
--- a/juno_frontend/src/main.rs
+++ b/juno_frontend/src/main.rs
@@ -6,8 +6,8 @@ mod codegen;
 mod dynconst;
 mod env;
 mod intrinsics;
-mod parser;
 mod locs;
+mod parser;
 mod semant;
 mod ssa;
 mod types;
@@ -61,6 +61,9 @@ fn main() {
             if args.verify || args.verify_all {
                 pm.add_pass(hercules_opt::pass::Pass::Verify);
             }
+            if args.x_dot {
+                pm.add_pass(hercules_opt::pass::Pass::Xdot(true));
+            }
             add_verified_pass!(pm, args, PhiElim);
             if args.x_dot {
                 pm.add_pass(hercules_opt::pass::Pass::Xdot(true));
@@ -69,7 +72,7 @@ fn main() {
             add_pass!(pm, args, DCE);
             add_pass!(pm, args, GVN);
             add_pass!(pm, args, DCE);
-            add_pass!(pm, args, SROA);
+            //add_pass!(pm, args, SROA);
             if args.x_dot {
                 pm.add_pass(hercules_opt::pass::Pass::Xdot(true));
             }
@@ -78,16 +81,20 @@ fn main() {
             add_verified_pass!(pm, args, DCE);
             if args.x_dot {
                 pm.add_pass(hercules_opt::pass::Pass::Xdot(true));
+                pm.add_pass(hercules_opt::pass::Pass::SchedXdot);
             }
-            
+
             let src_file_path = Path::new(&src_file);
             let module_name = String::from(src_file_path.file_stem().unwrap().to_str().unwrap());
             let output_folder = match args.output {
                 Some(output_folder) => output_folder,
-                None => String::from(src_file_path.parent().unwrap().to_str().unwrap())
+                None => String::from(src_file_path.parent().unwrap().to_str().unwrap()),
             };
-            pm.add_pass(hercules_opt::pass::Pass::Codegen(output_folder, module_name));
-            
+            pm.add_pass(hercules_opt::pass::Pass::Codegen(
+                output_folder,
+                module_name,
+            ));
+
             let _ = pm.run_passes();
         }
         Err(errs) => {
-- 
GitLab