From 2c1427c56db66622e47367bbbf223286d9a0adc5 Mon Sep 17 00:00:00 2001
From: rarbore2 <>
Date: Mon, 23 Dec 2024 23:59:46 -0600
Subject: [PATCH] Misc. fixes to make cava compile

 hercules_cg/src/              |  32 ++++-
 hercules_cg/src/           |  24 ++++
 hercules_cg/src/              |   2 +
 hercules_cg/src/               | 183 +++++++++++++++++-----------
 hercules_ir/src/               |  25 +++-
 hercules_ir/src/        |   8 +-
 hercules_opt/src/             | 109 ++++++++++++++++-
 hercules_opt/src/          |   5 +-
 hercules_opt/src/            |  36 +++---
 hercules_samples/call/src/   |   2 +-
 hercules_samples/ccp/src/    |   2 +-
 juno_frontend/src/     |  10 ++
 juno_frontend/src/            |   4 +-
 juno_samples/cava.jn                |  14 +--
 juno_samples/nested_ccp/src/ |   2 +-
 15 files changed, 340 insertions(+), 118 deletions(-)
 create mode 100644 hercules_cg/src/

diff --git a/hercules_cg/src/ b/hercules_cg/src/
index eba161bb..080c195a 100644
--- a/hercules_cg/src/
+++ b/hercules_cg/src/
@@ -114,8 +114,8 @@ impl<'a> CPUContext<'a> {
             .map(|idx| (NodeID::new(idx), LLVMBlock::default()))
-        // Emit calculation of dynamic constants into the start block. Just
-        // calculate every dynamic constant, and let LLVM clean them up.
+        // Emit calculation of dynamic constants into the start block. Calculate
+        // every valid dynamic constant, and let LLVM clean them up.
@@ -212,7 +212,11 @@ impl<'a> CPUContext<'a> {
                 let term = &mut blocks.get_mut(&id).unwrap().term;
                 write!(term, "  ret {}\n", self.get_value(data, true))?
-            _ => panic!("PANIC: Can't lower {:?}.", self.function.nodes[id.idx()]),
+            _ => panic!(
+                "PANIC: Can't lower {:?} in {}.",
+                self.function.nodes[id.idx()],
+            ),
@@ -866,6 +870,28 @@ fn convert_intrinsic(intrinsic: &Intrinsic, ty: &Type) -> String {
         Intrinsic::Ln => "log",
         Intrinsic::Log10 => "log10",
         Intrinsic::Log2 => "log2",
+        Intrinsic::Max => {
+            if ty.is_float() {
+                "max"
+            } else if ty.is_unsigned() {
+                "umax"
+            } else if ty.is_signed() {
+                "smax"
+            } else {
+                panic!()
+            }
+        }
+        Intrinsic::Min => {
+            if ty.is_float() {
+                "min"
+            } else if ty.is_unsigned() {
+                "umin"
+            } else if ty.is_signed() {
+                "smin"
+            } else {
+                panic!()
+            }
+        }
         Intrinsic::Pow => "pow",
         Intrinsic::Powf => "pow",
         Intrinsic::Powi => "powi",
diff --git a/hercules_cg/src/ b/hercules_cg/src/
new file mode 100644
index 00000000..7dbeeeda
--- /dev/null
+++ b/hercules_cg/src/
@@ -0,0 +1,24 @@
+extern crate hercules_ir;
+use self::hercules_ir::*;
+ * Top level function to definitively place functions onto devices. A function
+ * may store a device placement, but only optionally - this function assigns
+ * devices to the rest of the functions.
+ */
+pub fn device_placement(functions: &Vec<Function>, callgraph: &CallGraph) -> Vec<Device> {
+    let mut devices = vec![];
+    for (idx, function) in functions.into_iter().enumerate() {
+        if let Some(device) = function.device {
+            devices.push(device);
+        } else if function.entry || callgraph.num_callees(FunctionID::new(idx)) != 0 {
+            devices.push(Device::AsyncRust);
+        } else {
+            devices.push(Device::LLVM);
+        }
+    }
+    devices
diff --git a/hercules_cg/src/ b/hercules_cg/src/
index 9013eff7..952ce368 100644
--- a/hercules_cg/src/
+++ b/hercules_cg/src/
@@ -1,9 +1,11 @@
 #![feature(if_let_guard, let_chains)]
 pub mod cpu;
+pub mod device;
 pub mod mem;
 pub mod rt;
 pub use crate::cpu::*;
+pub use crate::device::*;
 pub use crate::mem::*;
 pub use crate::rt::*;
diff --git a/hercules_cg/src/ b/hercules_cg/src/
index ddbc8f53..305ecf9b 100644
--- a/hercules_cg/src/
+++ b/hercules_cg/src/
@@ -24,6 +24,7 @@ pub fn rt_codegen<W: Write>(
     control_subgraph: &Subgraph,
     bbs: &Vec<NodeID>,
     callgraph: &CallGraph,
+    devices: &Vec<Device>,
     memory_objects: &Vec<MemoryObjects>,
     memory_objects_mutability: &MemoryObjectsMutability,
     w: &mut W,
@@ -36,6 +37,7 @@ pub fn rt_codegen<W: Write>(
+        devices,
         _memory_objects_mutability: memory_objects_mutability,
@@ -50,6 +52,7 @@ struct RTContext<'a> {
     control_subgraph: &'a Subgraph,
     bbs: &'a Vec<NodeID>,
     callgraph: &'a CallGraph,
+    devices: &'a Vec<Device>,
     memory_objects: &'a Vec<MemoryObjects>,
     // TODO: use once memory objects are passed in a custom type where this
     // actually matters.
@@ -157,6 +160,9 @@ impl<'a> RTContext<'a> {
         // Dump signatures for called CPU functions.
         write!(w, "    extern \"C\" {{\n")?;
         for callee in self.callgraph.get_callees(self.func_id) {
+            if self.devices[callee.idx()] != Device::LLVM {
+                continue;
+            }
             let callee = &self.module.functions[callee.idx()];
             write!(w, "        fn {}(",;
             let mut first_param = true;
@@ -376,86 +382,119 @@ impl<'a> RTContext<'a> {
                 ref dynamic_constants,
                 ref args,
             } => {
-                let block = &mut blocks.get_mut(&self.bbs[id.idx()]).unwrap();
-                write!(
-                    block,
-                    "                {} = unsafe {{ {}(",
-                    self.get_value(id),
-                    self.module.functions[callee_id.idx()].name
-                )?;
-                for dc in dynamic_constants {
-                    self.codegen_dynamic_constant(*dc, block)?;
-                    write!(block, ", ")?;
-                }
-                for arg in args {
-                    write!(block, "{}, ", self.get_value(*arg))?;
-                }
-                write!(block, ") }};\n")?;
-                // When a CPU function is called that returns a memory object,
-                // that memory object must have come from one of its parameters.
-                // Dynamically figure out which one it came from, so that we can
-                // move it to the slot of the output memory object.
-                let call_memory_objects =
-                    self.memory_objects[self.func_id.idx()].memory_objects(id);
-                if !call_memory_objects.is_empty() {
-                    assert_eq!(call_memory_objects.len(), 1);
-                    let call_memory_object = call_memory_objects[0];
-                    let callee_returned_memory_objects =
-                        self.memory_objects[callee_id.idx()].returned_memory_objects();
-                    let possible_params: Vec<_> = (0..self.module.functions[callee_id.idx()]
-                        .param_types
-                        .len())
-                        .filter(|idx| {
-                            let memory_object_of_param = self.memory_objects[callee_id.idx()]
-                                .memory_object_of_parameter(*idx);
-                            // Look at parameters that could be the source of
-                            // the memory object returned by the function.
-                            memory_object_of_param
-                                .map(|memory_object_of_param| {
-                                    callee_returned_memory_objects.contains(&memory_object_of_param)
-                                })
-                                .unwrap_or(false)
-                        })
-                        .collect();
-                    let arg_memory_objects = args
-                        .into_iter()
-                        .enumerate()
-                        .filter(|(idx, _)| possible_params.contains(idx))
-                        .map(|(_, arg)| {
-                            self.memory_objects[self.func_id.idx()]
-                                .memory_objects(*arg)
+                match self.devices[callee_id.idx()] {
+                    Device::LLVM => {
+                        let block = &mut blocks.get_mut(&self.bbs[id.idx()]).unwrap();
+                        write!(
+                            block,
+                            "                {} = unsafe {{ {}(",
+                            self.get_value(id),
+                            self.module.functions[callee_id.idx()].name
+                        )?;
+                        for dc in dynamic_constants {
+                            self.codegen_dynamic_constant(*dc, block)?;
+                            write!(block, ", ")?;
+                        }
+                        for arg in args {
+                            write!(block, "{}, ", self.get_value(*arg))?;
+                        }
+                        write!(block, ") }};\n")?;
+                        // When a CPU function is called that returns a memory
+                        // object, that memory object must have come from one of
+                        // its parameters. Dynamically figure out which one it
+                        // came from, so that we can move it to the slot of the
+                        // output memory object.
+                        let call_memory_objects =
+                            self.memory_objects[self.func_id.idx()].memory_objects(id);
+                        if !call_memory_objects.is_empty() {
+                            assert_eq!(call_memory_objects.len(), 1);
+                            let call_memory_object = call_memory_objects[0];
+                            let callee_returned_memory_objects =
+                                self.memory_objects[callee_id.idx()].returned_memory_objects();
+                            let possible_params: Vec<_> =
+                                (0..self.module.functions[callee_id.idx()].param_types.len())
+                                    .filter(|idx| {
+                                        let memory_object_of_param = self.memory_objects
+                                            [callee_id.idx()]
+                                        .memory_object_of_parameter(*idx);
+                                        // Look at parameters that could be the
+                                        // source of the memory object returned
+                                        // by the function.
+                                        memory_object_of_param
+                                            .map(|memory_object_of_param| {
+                                                callee_returned_memory_objects
+                                                    .contains(&memory_object_of_param)
+                                            })
+                                            .unwrap_or(false)
+                                    })
+                                    .collect();
+                            let arg_memory_objects = args
-                        })
-                        .flatten();
-                    // Dynamically check which of the memory objects
-                    // corresponding to arguments to the call was returned by
-                    // the call. Move that memory object into the memory object
-                    // of the call.
-                    let mut first_obj = true;
-                    for arg_memory_object in arg_memory_objects {
-                        write!(block, "                ")?;
-                        if first_obj {
-                            first_obj = false;
-                        } else {
-                            write!(block, "else ")?;
+                                .enumerate()
+                                .filter(|(idx, _)| possible_params.contains(idx))
+                                .map(|(_, arg)| {
+                                    self.memory_objects[self.func_id.idx()]
+                                        .memory_objects(*arg)
+                                        .into_iter()
+                                })
+                                .flatten();
+                            // Dynamically check which of the memory objects
+                            // corresponding to arguments to the call was
+                            // returned by the call. Move that memory object
+                            // into the memory object of the call.
+                            let mut first_obj = true;
+                            for arg_memory_object in arg_memory_objects {
+                                write!(block, "                ")?;
+                                if first_obj {
+                                    first_obj = false;
+                                } else {
+                                    write!(block, "else ")?;
+                                }
+                                write!(block, "if let Some(mem_obj) = mem_obj{}.as_mut() && ::std::boxed::Box::as_mut_ptr(mem_obj) as *mut u8 == {} {{\n", arg_memory_object, self.get_value(id))?;
+                                write!(
+                                    block,
+                                    "                    mem_obj{} = mem_obj{}.take();\n",
+                                    call_memory_object, arg_memory_object
+                                )?;
+                                write!(block, "                }}\n")?;
+                            }
+                            write!(block, "                else {{\n")?;
+                            write!(block, "                    panic!(\"HERCULES PANIC: Pointer returned from called function doesn't match any known memory objects.\");\n")?;
+                            write!(block, "                }}\n")?;
-                        write!(block, "if let Some(mem_obj) = mem_obj{}.as_mut() && ::std::boxed::Box::as_mut_ptr(mem_obj) as *mut u8 == {} {{\n", arg_memory_object, self.get_value(id))?;
+                    }
+                    Device::AsyncRust => {
+                        let block = &mut blocks.get_mut(&self.bbs[id.idx()]).unwrap();
-                            "                    mem_obj{} = mem_obj{}.take();\n",
-                            call_memory_object, arg_memory_object
+                            "                {} = {}(",
+                            self.get_value(id),
+                            self.module.functions[callee_id.idx()].name
-                        write!(block, "                }}\n")?;
+                        for dc in dynamic_constants {
+                            self.codegen_dynamic_constant(*dc, block)?;
+                            write!(block, ", ")?;
+                        }
+                        for arg in args {
+                            if self.module.types[self.typing[arg.idx()].idx()].is_primitive() {
+                                write!(block, "{}, ", self.get_value(*arg))?;
+                            } else {
+                                write!(block, "{}.take(), ", self.get_value(*arg))?;
+                            }
+                        }
+                        write!(block, ").await;\n")?;
-                    write!(block, "                else {{\n")?;
-                    write!(block, "                    panic!(\"HERCULES PANIC: Pointer returned from called function doesn't match any known memory objects.\");\n")?;
-                    write!(block, "                }}\n")?;
+                    _ => todo!(),
-            _ => panic!("PANIC: Can't lower {:?}.", func.nodes[id.idx()]),
+            _ => panic!(
+                "PANIC: Can't lower {:?} in {}.",
+                func.nodes[id.idx()],
+            ),
diff --git a/hercules_ir/src/ b/hercules_ir/src/
index d4eed8e2..b46e2dda 100644
--- a/hercules_ir/src/
+++ b/hercules_ir/src/
@@ -289,6 +289,8 @@ pub enum Intrinsic {
+    Max,
+    Min,
@@ -327,13 +329,12 @@ pub enum Schedule {
  * The authoritative enumeration of supported backends. Multiple backends may
  * correspond to the same kind of hardware.
-#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 pub enum Device {
-    // Internal nodes in the call graph are lowered to async Rust code that
-    // calls device functions (leaf nodes in the call graph), possibly
-    // concurrently.
+    // Entry functions are lowered to async Rust code that calls device
+    // functions (leaf nodes in the call graph), possibly concurrently.
@@ -594,7 +595,10 @@ pub fn constants_bottom_up(constants: &Vec<Constant>) -> impl Iterator<Item = Co
  * Create an iterator that traverses all the dynamic constants in the module
  * bottom up. This uses a coroutine to make iteratively traversing the dynamic
- * constant DAGs easier.
+ * constant DAGs easier. This bottom-up visitor will ignore dynamic constants
+ * that reference non-sensical IDs. These are artifacts left over by the process
+ * of subsituting dynamic constants during some transformation passes, and
+ * shouldn't be used by any nodes.
 pub fn dynamic_constants_bottom_up(
     dynamic_constants: &Vec<DynamicConstant>,
@@ -619,6 +623,11 @@ pub fn dynamic_constants_bottom_up(
                     // We have to yield the children of this node before
                     // this node itself. We keep track of which nodes have
                     // yielded using visited.
+                    if left.idx() >= visited.len() || right.idx() >= visited.len() {
+                        // This is an invalid dynamic constant and should be
+                        // skipped.
+                        continue;
+                    }
                     let can_yield = visited[left.idx()] && visited[right.idx()];
                     if can_yield {
                         visited.set(id.idx(), true);
@@ -1565,6 +1574,8 @@ impl Intrinsic {
             "log" => Some(Intrinsic::Log),
             "log10" => Some(Intrinsic::Log10),
             "log2" => Some(Intrinsic::Log2),
+            "max" => Some(Intrinsic::Max),
+            "min" => Some(Intrinsic::Min),
             "pow" => Some(Intrinsic::Pow),
             "powf" => Some(Intrinsic::Powf),
             "powi" => Some(Intrinsic::Powi),
@@ -1601,6 +1612,8 @@ impl Intrinsic {
             Intrinsic::Log => "Log",
             Intrinsic::Log10 => "Log10",
             Intrinsic::Log2 => "Log2",
+            Intrinsic::Max => "Max",
+            Intrinsic::Min => "Min",
             Intrinsic::Pow => "Pow",
             Intrinsic::Powf => "Powf",
             Intrinsic::Powi => "Powi",
@@ -1636,6 +1649,8 @@ impl Intrinsic {
             Intrinsic::Log => "log",
             Intrinsic::Log10 => "log10",
             Intrinsic::Log2 => "log2",
+            Intrinsic::Max => "max",
+            Intrinsic::Min => "min",
             Intrinsic::Pow => "pow",
             Intrinsic::Powf => "powf",
             Intrinsic::Powi => "powi",
diff --git a/hercules_ir/src/ b/hercules_ir/src/
index 46a0a9f2..c657d598 100644
--- a/hercules_ir/src/
+++ b/hercules_ir/src/
@@ -790,6 +790,8 @@ fn typeflow(
                 | Intrinsic::Tanh => 1,
                 | Intrinsic::Log
+                | Intrinsic::Max
+                | Intrinsic::Min
                 | Intrinsic::Pow
                 | Intrinsic::Powf
                 | Intrinsic::Powi => 2,
@@ -844,6 +846,8 @@ fn typeflow(
                 | Intrinsic::Ln1P
                 | Intrinsic::Log10
                 | Intrinsic::Log2
+                | Intrinsic::Max
+                | Intrinsic::Min
                 | Intrinsic::Round
                 | Intrinsic::Sin
                 | Intrinsic::Sinh
@@ -1062,9 +1066,7 @@ fn typeflow(
             // Type is the type of the _if node
-        Node::Undef { ty } => {
-            TypeSemilattice::Concrete(*ty)
-        }
+        Node::Undef { ty } => TypeSemilattice::Concrete(*ty),
diff --git a/hercules_opt/src/ b/hercules_opt/src/
index 28215366..a66bf633 100644
--- a/hercules_opt/src/
+++ b/hercules_opt/src/
@@ -1,11 +1,12 @@
 extern crate hercules_ir;
+use std::cmp::{max, min};
 use std::collections::HashSet;
 use std::iter::zip;
 use self::hercules_ir::dataflow::*;
-use self::hercules_ir::ir::*;
 use self::hercules_ir::def_use::get_uses;
+use self::hercules_ir::ir::*;
 use crate::*;
@@ -833,6 +834,112 @@ fn ccp_flow_function(
                     Intrinsic::Log => binary_float_intrinsic!(intrinsic, constants, log),
                     Intrinsic::Log10 => unary_float_intrinsic!(intrinsic, constants, log10),
                     Intrinsic::Log2 => unary_float_intrinsic!(intrinsic, constants, log2),
+                    Intrinsic::Max => {
+                        if let (Constant::Integer8(i), Constant::Integer8(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Integer8(max(*i, *j)))
+                        } else if let (Constant::Integer16(i), Constant::Integer16(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Integer16(max(*i, *j)))
+                        } else if let (Constant::Integer32(i), Constant::Integer32(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Integer32(max(*i, *j)))
+                        } else if let (Constant::Integer64(i), Constant::Integer64(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Integer64(max(*i, *j)))
+                        } else if let (
+                            Constant::UnsignedInteger8(i),
+                            Constant::UnsignedInteger8(j),
+                        ) = (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::UnsignedInteger8(max(*i, *j)))
+                        } else if let (
+                            Constant::UnsignedInteger16(i),
+                            Constant::UnsignedInteger16(j),
+                        ) = (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::UnsignedInteger16(max(*i, *j)))
+                        } else if let (
+                            Constant::UnsignedInteger32(i),
+                            Constant::UnsignedInteger32(j),
+                        ) = (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::UnsignedInteger32(max(*i, *j)))
+                        } else if let (
+                            Constant::UnsignedInteger64(i),
+                            Constant::UnsignedInteger64(j),
+                        ) = (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::UnsignedInteger64(max(*i, *j)))
+                        } else if let (Constant::Float32(i), Constant::Float32(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Float32(*i.max(j)))
+                        } else if let (Constant::Float64(i), Constant::Float64(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Float64(*i.max(j)))
+                        } else {
+                            panic!("Unsupported combination of intrinsic abs and constant value. Did typechecking succeed?")
+                        }
+                    }
+                    Intrinsic::Min => {
+                        if let (Constant::Integer8(i), Constant::Integer8(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Integer8(min(*i, *j)))
+                        } else if let (Constant::Integer16(i), Constant::Integer16(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Integer16(min(*i, *j)))
+                        } else if let (Constant::Integer32(i), Constant::Integer32(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Integer32(min(*i, *j)))
+                        } else if let (Constant::Integer64(i), Constant::Integer64(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Integer64(min(*i, *j)))
+                        } else if let (
+                            Constant::UnsignedInteger8(i),
+                            Constant::UnsignedInteger8(j),
+                        ) = (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::UnsignedInteger8(min(*i, *j)))
+                        } else if let (
+                            Constant::UnsignedInteger16(i),
+                            Constant::UnsignedInteger16(j),
+                        ) = (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::UnsignedInteger16(min(*i, *j)))
+                        } else if let (
+                            Constant::UnsignedInteger32(i),
+                            Constant::UnsignedInteger32(j),
+                        ) = (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::UnsignedInteger32(min(*i, *j)))
+                        } else if let (
+                            Constant::UnsignedInteger64(i),
+                            Constant::UnsignedInteger64(j),
+                        ) = (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::UnsignedInteger64(min(*i, *j)))
+                        } else if let (Constant::Float32(i), Constant::Float32(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Float32(*i.min(j)))
+                        } else if let (Constant::Float64(i), Constant::Float64(j)) =
+                            (constants[0], constants[1])
+                        {
+                            ConstantLattice::Constant(Constant::Float64(*i.min(j)))
+                        } else {
+                            panic!("Unsupported combination of intrinsic abs and constant value. Did typechecking succeed?")
+                        }
+                    }
                     Intrinsic::Pow => {
                         if let Constant::UnsignedInteger32(p) = constants[1] {
                             if let Constant::Integer8(i) = constants[0] {
diff --git a/hercules_opt/src/ b/hercules_opt/src/
index 1c485ecc..9ab15874 100644
--- a/hercules_opt/src/
+++ b/hercules_opt/src/
@@ -45,6 +45,9 @@ pub fn inline(editors: &mut [FunctionEditor], callgraph: &CallGraph) {
     // Step 4: run inlining on each function individually. Iterate the functions
     // in topological order.
     for to_inline_id in topo {
+        if editors[to_inline_id.idx()].func().entry {
+            continue;
+        }
         // Since Rust cannot analyze the accesses into an array of mutable
         // references, we need to do some weirdness here to simultaneously get:
         // 1. A mutable reference to the function we're modifying.
@@ -181,7 +184,7 @@ fn inline_func(
                     // as the new references we just made in the first step. We
                     // actually want to institute all the updates
                     // *simultaneously*, hence the two step maneuver.
-                    let first_dc = edit.num_dynamic_constants() + 1;
+                    let first_dc = edit.num_dynamic_constants() + 100;
                     for (dc_a, dc_n) in zip(dcs_a, first_dc..) {
diff --git a/hercules_opt/src/ b/hercules_opt/src/
index 58251c42..24ed0e4e 100644
--- a/hercules_opt/src/
+++ b/hercules_opt/src/
@@ -864,11 +864,25 @@ impl PassManager {
                     let memory_objects_mutable =
                         memory_objects_mutability(&self.module, &callgraph, &memory_objects);
+                    let devices = device_placement(&self.module.functions, &callgraph);
                     let mut rust_rt = String::new();
                     let mut llvm_ir = String::new();
                     for idx in 0..self.module.functions.len() {
-                        if self.module.functions[idx].entry {
-                            rt_codegen(
+                        match devices[idx] {
+                            Device::LLVM => cpu_codegen(
+                                &self.module.functions[idx],
+                                &self.module.types,
+                                &self.module.constants,
+                                &self.module.dynamic_constants,
+                                &reverse_postorders[idx],
+                                &typing[idx],
+                                &control_subgraphs[idx],
+                                &bbs[idx],
+                                &mut llvm_ir,
+                            )
+                            .unwrap(),
+                            Device::AsyncRust => rt_codegen(
@@ -876,25 +890,13 @@ impl PassManager {
+                                &devices,
                                 &mut rust_rt,
-                            .unwrap();
-                        } else {
-                            // TODO: determine which backend to use for function.
-                            cpu_codegen(
-                                &self.module.functions[idx],
-                                &self.module.types,
-                                &self.module.constants,
-                                &self.module.dynamic_constants,
-                                &reverse_postorders[idx],
-                                &typing[idx],
-                                &control_subgraphs[idx],
-                                &bbs[idx],
-                                &mut llvm_ir,
-                            )
-                            .unwrap();
+                            .unwrap(),
+                            _ => todo!(),
                     println!("{}", llvm_ir);
diff --git a/hercules_samples/call/src/ b/hercules_samples/call/src/
index 3bbb634c..b5c999fd 100644
--- a/hercules_samples/call/src/
+++ b/hercules_samples/call/src/
@@ -14,6 +14,6 @@ fn main() {
-fn dot_test() {
+fn call_test() {
diff --git a/hercules_samples/ccp/src/ b/hercules_samples/ccp/src/
index 5fc78ab5..9e2aced9 100644
--- a/hercules_samples/ccp/src/
+++ b/hercules_samples/ccp/src/
@@ -13,6 +13,6 @@ fn main() {
-fn dot_test() {
+fn ccp_test() {
diff --git a/juno_frontend/src/ b/juno_frontend/src/
index e0c2d2c3..e307f7f0 100644
--- a/juno_frontend/src/
+++ b/juno_frontend/src/
@@ -148,6 +148,16 @@ static INTRINSICS: phf::Map<&'static str, IntrinsicInfo> = phf_map! {
         kinds   : &[parser::Kind::Float],
         typ     : var_type,
+    "max"   => IntrinsicInfo {
+        id      : hercules_ir::ir::Intrinsic::Max,
+        kinds   : &[parser::Kind::Number],
+        typ     : var2_type,
+    },
+    "min"   => IntrinsicInfo {
+        id      : hercules_ir::ir::Intrinsic::Min,
+        kinds   : &[parser::Kind::Number],
+        typ     : var2_type,
+    },
     "pow" => IntrinsicInfo {
         id      : hercules_ir::ir::Intrinsic::Pow,
         kinds   : &[parser::Kind::Integer],
diff --git a/juno_frontend/src/ b/juno_frontend/src/
index 4ff5d9fc..b18b2979 100644
--- a/juno_frontend/src/
+++ b/juno_frontend/src/
@@ -184,8 +184,8 @@ pub fn compile_ir(
     if x_dot {
-    add_pass!(pm, verify, Forkify);
-    add_pass!(pm, verify, ForkGuardElim);
+    //add_pass!(pm, verify, Forkify);
+    //add_pass!(pm, verify, ForkGuardElim);
     add_verified_pass!(pm, verify, DCE);
     add_pass!(pm, verify, Outline);
     add_pass!(pm, verify, InterproceduralSROA);
diff --git a/juno_samples/cava.jn b/juno_samples/cava.jn
index 977e2e02..9d398db1 100644
--- a/juno_samples/cava.jn
+++ b/juno_samples/cava.jn
@@ -1,11 +1,3 @@
-fn max<a : number>(x : a, y : a) -> a {
-  return if x >= y then x else y;
-fn min<a : number>(x : a, y : a) -> a {
-  return if x < y then x else y;
 fn medianMatrix<a : number, rows, cols : usize>(m : a[rows, cols]) -> a {
   const n : usize = rows * cols;
@@ -52,7 +44,7 @@ fn descale<row : usize, col : usize>(input : f32[CHAN, row, col]) -> u8[CHAN, ro
   for chan = 0 to CHAN {
     for r = 0 to row {
       for c = 0 to col {
-        res[chan, r, c] = min::<f32>(max::<f32>(input[chan, r, c] * 255, 0), 255) as u8;
+        res[chan, r, c] = min!::<f32>(max!::<f32>(input[chan, r, c] * 255, 0), 255) as u8;
@@ -144,7 +136,7 @@ fn transform<row : usize, col : usize>
   for chan = 0 to CHAN {
     for r = 0 to row {
       for c = 0 to col {
-        result[chan, r, c] = max::<f32>(
+        result[chan, r, c] = max!::<f32>(
                                 input[0, r, c] * tstw_trans[0, chan]
                                 + input[1, r, c] * tstw_trans[1, chan]
                                 + input[2, r, c] * tstw_trans[2, chan]
@@ -181,7 +173,7 @@ fn gamut<row : usize, col : usize>
         chan_val += coefs[0, chan] + coefs[1, chan] * input[0, r, c]
                                    + coefs[2, chan] * input[1, r, c]
                                    + coefs[3, chan] * input[2, r, c];
-        result[chan, row, col] = max::<f32>(chan_val, 0);
+        result[chan, row, col] = max!::<f32>(chan_val, 0);
diff --git a/juno_samples/nested_ccp/src/ b/juno_samples/nested_ccp/src/
index 9b38476e..83132aca 100644
--- a/juno_samples/nested_ccp/src/
+++ b/juno_samples/nested_ccp/src/
@@ -35,6 +35,6 @@ fn main() {
-fn nested_ccp() {
+fn nested_ccp_test() {