diff --git a/hercules_cg/Cargo.toml b/hercules_cg/Cargo.toml
deleted file mode 100644
index 9464153078b92bc8a9ee6f4c5535c4e435395b2a..0000000000000000000000000000000000000000
--- a/hercules_cg/Cargo.toml
+++ /dev/null
@@ -1,8 +0,0 @@
-[package]
-name = "hercules_cg"
-version = "0.1.0"
-authors = ["Russel Arbore <rarbore2@illinois.edu>"]
-
-[dependencies]
-bitvec = "*"
-hercules_ir = { path = "../hercules_ir" }
diff --git a/hercules_cg/src/common.rs b/hercules_cg/src/common.rs
deleted file mode 100644
index 45061aad0ff7c0885756ae8faad2df2550a9d9b8..0000000000000000000000000000000000000000
--- a/hercules_cg/src/common.rs
+++ /dev/null
@@ -1,738 +0,0 @@
-extern crate hercules_ir;
-
-use std::collections::HashMap;
-use std::iter::repeat;
-
-use self::hercules_ir::*;
-
-/*
- * Pretty much all of the codegen functions need to take in some large subset of
- * IR structures, analysis results, and global pieces of information. Package
- * them all in this struct, and make all the codegen functions members of this
- * struct to cut down on the number of function arguments. This structure
- * shouldn't be modified after creation.
- */
-pub(crate) struct FunctionContext<'a> {
-    pub(crate) function: &'a Function,
-    pub(crate) types: &'a Vec<Type>,
-    pub(crate) constants: &'a Vec<Constant>,
-    pub(crate) dynamic_constants: &'a Vec<DynamicConstant>,
-    pub(crate) def_use: &'a ImmutableDefUseMap,
-    pub(crate) reverse_postorder: &'a Vec<NodeID>,
-    pub(crate) typing: &'a Vec<TypeID>,
-    pub(crate) control_subgraph: &'a Subgraph,
-    pub(crate) fork_join_map: &'a HashMap<NodeID, NodeID>,
-    pub(crate) fork_join_nest: &'a HashMap<NodeID, Vec<NodeID>>,
-    pub(crate) antideps: &'a Vec<(NodeID, NodeID)>,
-    pub(crate) bbs: &'a Vec<NodeID>,
-    pub(crate) plan: &'a Plan,
-    pub(crate) llvm_types: &'a Vec<String>,
-    pub(crate) llvm_constants: &'a Vec<String>,
-    pub(crate) llvm_dynamic_constants: &'a Vec<String>,
-    pub(crate) type_sizes_aligns: &'a Vec<(Option<usize>, usize)>,
-    pub(crate) partitions_inverted_map: Vec<Vec<NodeID>>,
-}
-
-impl<'a> FunctionContext<'a> {
-    /*
-     * Find data inputs to a partition.
-     */
-    pub(crate) fn partition_data_inputs(&self, partition_id: PartitionID) -> Vec<NodeID> {
-        let partition = &self.partitions_inverted_map[partition_id.idx()];
-
-        let mut data_inputs: Vec<NodeID> = partition
-            .iter()
-            .map(|id| {
-                // For each node in the partition, filter out the uses that are
-                // data nodes and are in a different partition.
-                get_uses(&self.function.nodes[id.idx()])
-                    .as_ref()
-                    .into_iter()
-                    .filter(|id| {
-                        // Filter out control nodes (just looking for data
-                        // inputs here), check that it's in another partition,
-                        // and ignore parameters, constants, and dynamic
-                        // constants (those are each passed to partition
-                        // functions using different mechanisms).
-                        !self.function.nodes[id.idx()].is_control()
-                            && self.plan.partitions[id.idx()] != partition_id
-                            && !self.function.nodes[id.idx()].is_parameter()
-                            && !self.function.nodes[id.idx()].is_constant()
-                            && !self.function.nodes[id.idx()].is_dynamic_constant()
-                    })
-                    .map(|x| *x)
-                    .collect::<Vec<NodeID>>()
-            })
-            // Collect all such uses across the whole partition.
-            .flatten()
-            .collect();
-
-        // Inputs and outputs of partitions need to be sorted so datums don't
-        // get mixed up.
-        data_inputs.sort();
-        data_inputs
-    }
-
-    /*
-     * Find data outputs of a partition.
-     */
-    pub(crate) fn partition_data_outputs(&self, partition_id: PartitionID) -> Vec<NodeID> {
-        let partition = &self.partitions_inverted_map[partition_id.idx()];
-
-        let mut data_outputs: Vec<NodeID> = partition
-            .iter()
-            .filter(|id| {
-                // For each data node in the partition, check if it has any uses
-                // outside its partition. Users can be control or data nodes.
-                // Also, don't add parameter, constant, and dynamic constant
-                // nodes. These nodes are passed to partition mechanisms using
-                // different mechanism.
-                !self.function.nodes[id.idx()].is_control()
-                    && !self.function.nodes[id.idx()].is_parameter()
-                    && !self.function.nodes[id.idx()].is_constant()
-                    && !self.function.nodes[id.idx()].is_dynamic_constant()
-                    && self
-                        .def_use
-                        .get_users(**id)
-                        .as_ref()
-                        .into_iter()
-                        .filter(|id| self.plan.partitions[id.idx()] != partition_id)
-                        .map(|x| *x)
-                        .count()
-                        > 0
-            })
-            .map(|x| *x)
-            // If this partition contains a return node, the data input of that
-            // node is a data output.
-            .chain(partition.iter().filter_map(|id| {
-                if let Node::Return { control: _, data } = self.function.nodes[id.idx()] {
-                    Some(data)
-                } else {
-                    None
-                }
-            }))
-            .collect();
-
-        // Inputs and outputs of partitions need to be sorted so datums don't
-        // get mixed up.
-        data_outputs.sort();
-        data_outputs
-    }
-
-    /*
-     * Find control nodes that will return from a partition.
-     */
-    pub(crate) fn partition_control_returns(&self, partition_id: PartitionID) -> Vec<NodeID> {
-        let partition = &self.partitions_inverted_map[partition_id.idx()];
-
-        partition
-            .iter()
-            .filter(|id| {
-                // For each control node in the partition, check if it has any
-                // users outside its partition. Users can be control nodes - if
-                // a user in a different partition is a data node, then the
-                // partition is malformed. Return nodes are also unconditionally
-                // a control return of this partition.
-                let outside_user_count = self
-                    .def_use
-                    .get_users(**id)
-                    .as_ref()
-                    .into_iter()
-                    .filter(|user_id| {
-                        // Users of control nodes can only be data nodes
-                        // if they are in the same partition as the
-                        // control node. Only control users may be in a
-                        // different partition.
-                        assert!(
-                            !self.function.nodes[id.idx()].is_control()
-                                || self.function.nodes[user_id.idx()].is_control()
-                                || self.plan.partitions[user_id.idx()] == partition_id
-                        );
-                        self.plan.partitions[user_id.idx()] != partition_id
-                    })
-                    .count();
-
-                // Just calculated for the below assert.
-                let control_user_count = self
-                    .def_use
-                    .get_users(**id)
-                    .as_ref()
-                    .into_iter()
-                    .filter(|id| self.function.nodes[id.idx()].is_control())
-                    .count();
-
-                // A control node cannot have users inside and outside its own
-                // partition. This is because a well-formedness condition of if
-                // and match nodes (the only control nodes allowed to have
-                // multiple users) is their read successors must be in the same
-                // partition as them.
-                assert!(
-                    !self.function.nodes[id.idx()].is_control()
-                        || outside_user_count == 0
-                        || outside_user_count == control_user_count
-                );
-                self.function.nodes[id.idx()].is_control()
-                    && (self.function.nodes[id.idx()].is_return() || outside_user_count > 0)
-            })
-            .map(|x| *x)
-            .collect()
-    }
-
-    /*
-     * Find control successors of a given partition. A partition cannot be a
-     * control successor of itself, since a self-cycle is represented as control
-     * flow within a partiion. In other words, the graph of control flow between
-     * partitions is free of self-loops (an edge connecting a partition to
-     * itself).
-     */
-    pub(crate) fn partition_control_successors(
-        &self,
-        partition_id: PartitionID,
-    ) -> Vec<(PartitionID, NodeID)> {
-        let partition = &self.partitions_inverted_map[partition_id.idx()];
-
-        partition
-            .iter()
-            // Only consider nodes in other partitions that are successors of
-            // control nodes. These are necessarily other control nodes.
-            .filter(|id| self.function.nodes[id.idx()].is_control())
-            .map(|id| {
-                // Get the partitions (that are not this partition) of successor
-                // nodes of control nodes.
-                self.def_use
-                    .get_users(*id)
-                    .as_ref()
-                    .into_iter()
-                    .map(|id| self.plan.partitions[id.idx()])
-                    .filter(|id| *id != partition_id)
-                    .map(move |part_id| (part_id, *id))
-            })
-            // We want a flat list of all such partitions.
-            .flatten()
-            .collect()
-    }
-
-    /*
-     * Calculate the reverse postorder of just this partition.
-     */
-    pub(crate) fn partition_reverse_postorder(&self, partition_id: PartitionID) -> Vec<NodeID> {
-        self.reverse_postorder
-            .iter()
-            .filter(|id| self.plan.partitions[id.idx()] == partition_id)
-            .map(|x| *x)
-            .collect()
-    }
-
-    /*
-     * Determine the array constant inputs to all partition functions. Get the
-     * constant IDs, and the array type IDs. Sort by constant ID for
-     * consistency.
-     */
-    pub(crate) fn partition_array_constant_inputs(&self) -> Vec<(ConstantID, TypeID)> {
-        let mut res = (0..self.constants.len())
-            .filter_map(|idx| {
-                self.constants[idx]
-                    .try_array_type(self.types)
-                    .map(|ty_id| (ConstantID::new(idx), ty_id))
-            })
-            .collect::<Vec<_>>();
-
-        res.sort();
-        res
-    }
-
-    /*
-     * Determine the dynamic constant inputs to all partition functions. Just
-     * assemble the dynamic constant IDs, since the type is always u64. Sort the
-     * parameters for consistency.
-     */
-    pub(crate) fn partition_dynamic_constant_inputs(&self) -> Vec<DynamicConstantID> {
-        let mut res = (0..self.dynamic_constants.len())
-            .filter_map(|idx| {
-                if self.dynamic_constants[idx].is_parameter() {
-                    Some(DynamicConstantID::new(idx))
-                } else {
-                    None
-                }
-            })
-            .collect::<Vec<_>>();
-
-        res.sort();
-        res
-    }
-
-    /*
-     * Determine the array numbers for all the array constants. These are needed
-     * to know which pointer passed to the runtime corresponds to which array
-     * constant. Return a map from constant ID to array number - non-array
-     * constants don't have an array number.
-     */
-    pub(crate) fn array_constant_inputs(&self) -> Vec<Option<u32>> {
-        self.constants
-            .iter()
-            .scan(0, |num, cons| {
-                if cons.try_array_type(self.types).is_some() {
-                    let res = Some(*num);
-                    *num += 1;
-                    Some(res)
-                } else {
-                    Some(None)
-                }
-            })
-            .collect()
-    }
-}
-
-/*
- * When emitting individual nodes in the partition codegen functions, a bunch of
- * partition analysis results are needed. Package them all in this struct, and
- * make all of the subroutines of the top level partition codegen functions
- * members of this struct to cut down on the number of function arguments. This
- * structure shouldn't be modified after creation. This structure only holds per
- * partition specific information - for example, global function parameters,
- * constant parameters, and dynamic constant parameters are not stored, since
- * those don't vary across partitions.
- */
-pub(crate) struct PartitionContext<'a> {
-    pub(crate) function: &'a FunctionContext<'a>,
-    pub(crate) partition_id: PartitionID,
-    pub(crate) top_node: NodeID,
-    pub(crate) data_inputs: Vec<NodeID>,
-    pub(crate) data_outputs: Vec<NodeID>,
-    pub(crate) control_returns: Vec<NodeID>,
-    pub(crate) reverse_postorder: Vec<NodeID>,
-    pub(crate) partition_input_types: Vec<TypeID>,
-    pub(crate) return_type: Type,
-    pub(crate) manifest: PartitionManifest,
-}
-
-impl<'a> PartitionContext<'a> {
-    pub(crate) fn new(
-        function: &'a FunctionContext<'a>,
-        partition_id: PartitionID,
-        top_node: NodeID,
-    ) -> Self {
-        let data_inputs = function.partition_data_inputs(partition_id);
-        let data_outputs = function.partition_data_outputs(partition_id);
-        let control_returns = function.partition_control_returns(partition_id);
-        let control_successors = function.partition_control_successors(partition_id);
-        let reverse_postorder = function.partition_reverse_postorder(partition_id);
-
-        // The data input types are just the types of data nodes used by this
-        // partition, originating in another partition.
-        let partition_input_types = data_inputs
-            .iter()
-            .map(|id| function.typing[id.idx()])
-            .collect();
-
-        // The return struct contains all of the data outputs, plus control
-        // information if there are multiple successor partitions. The control
-        // information is used by the Hercules runtime to implement control flow
-        // between partitions.
-        let multiple_control_successors = control_successors.len() > 1;
-        let output_data_types = data_outputs.iter().map(|id| function.typing[id.idx()]);
-        let return_type = if multiple_control_successors {
-            let u64_ty_id = TypeID::new(
-                function
-                    .types
-                    .iter()
-                    .position(|ty| *ty == Type::UnsignedInteger64)
-                    .unwrap(),
-            );
-            Type::Product(
-                output_data_types
-                    .chain(std::iter::once(u64_ty_id))
-                    .collect(),
-            )
-        } else {
-            Type::Product(output_data_types.collect())
-        };
-
-        // Assemble the manifest.
-        let mut manifest = PartitionManifest::default();
-        manifest.top_node = top_node;
-
-        // The first inputs are the data inputs, from other partitions.
-        manifest
-            .inputs
-            .extend(data_inputs.iter().map(|x| PartitionInput::DataInput(*x)));
-
-        // The next inputs are the function parameters, all in order.
-        manifest.inputs.extend(
-            (0..function.function.param_types.len())
-                .map(|x| PartitionInput::FunctionArgument(x as u32)),
-        );
-
-        // The next inputs are the array constants, all in order. TODO: only
-        // include constant inputs for constants actually used by this function,
-        // not all the constants in the module.
-        manifest.inputs.extend(
-            (0..(function
-                .constants
-                .iter()
-                .filter(|cons| cons.try_array_type(function.types).is_some())
-                .count()))
-                .map(|x| PartitionInput::ArrayConstant(x as u32)),
-        );
-
-        // The last inputs are the dynamic constants, all in order.
-        manifest.inputs.extend(
-            (0..function.function.num_dynamic_constants)
-                .map(|x| PartitionInput::DynamicConstant(x as u32)),
-        );
-
-        // The outputs are the data outputs of this partition.
-        manifest
-            .outputs
-            .extend(data_outputs.iter().map(|x| PartitionOutput::DataOutput(*x)));
-
-        // If there are multiple control returns, also output the node being
-        // returned from.
-        if multiple_control_successors {
-            manifest.outputs.push(PartitionOutput::ControlIndicator);
-        }
-
-        // Store the successor partitions.
-        manifest.successor_partitions = control_successors
-            .into_iter()
-            .map(|(part_id, control_id)| (control_id, part_id))
-            .collect();
-
-        PartitionContext {
-            function,
-            partition_id,
-            top_node,
-            data_inputs,
-            data_outputs,
-            control_returns,
-            reverse_postorder,
-            partition_input_types,
-            return_type,
-            manifest,
-        }
-    }
-}
-
-/*
- * Types, constants, and dynamic constants are fairly simple to translate into
- * LLVM IR.
- */
-
-pub(crate) fn generate_type_string(ty: &Type, llvm_types: &Vec<String>) -> String {
-    match ty {
-        Type::Control(_) => {
-            // Later, we create virtual registers corresponding to fork nodes of
-            // type i64, so we need the "type" of the fork node to be i64.
-            "i64".to_string()
-        }
-        Type::Boolean => "i1".to_string(),
-        Type::Integer8 | Type::UnsignedInteger8 => "i8".to_string(),
-        Type::Integer16 | Type::UnsignedInteger16 => "i16".to_string(),
-        Type::Integer32 | Type::UnsignedInteger32 => "i32".to_string(),
-        Type::Integer64 | Type::UnsignedInteger64 => "i64".to_string(),
-        Type::Float32 => "float".to_string(),
-        Type::Float64 => "double".to_string(),
-        // Because we traverse in bottom-up order, we can assume that the LLVM
-        // types for children types are already computed.
-        Type::Product(fields) => {
-            let mut iter = fields.iter();
-            if let Some(first) = iter.next() {
-                iter.fold("{".to_string() + &llvm_types[first.idx()], |s, f| {
-                    s + ", " + &llvm_types[f.idx()]
-                }) + "}"
-            } else {
-                "{}".to_string()
-            }
-        }
-        Type::Summation(_) => todo!(),
-        Type::Array(_, _) => {
-            // Array types becomes pointers. The element type and dynamic
-            // constant bounds characterize the access code we generate later,
-            // not the type itself.
-            "ptr".to_string()
-        }
-    }
-}
-
-pub(crate) fn generate_type_strings(module: &Module) -> Vec<String> {
-    // Render types into LLVM IR. This requires translating from our interning
-    // structures to LLVM types. We can't just blow through the types vector,
-    // since a type may reference a type ID ahead of it in the vector. Instead,
-    // iterate types in a bottom up order with respect to the type intern DAGs.
-    let mut llvm_types = vec!["".to_string(); module.types.len()];
-    for id in types_bottom_up(&module.types) {
-        llvm_types[id.idx()] = generate_type_string(&module.types[id.idx()], &llvm_types);
-    }
-
-    llvm_types
-}
-
-pub(crate) fn generate_constant_string(
-    cons_id: ConstantID,
-    cons: &Constant,
-    tys: &Vec<Type>,
-    llvm_constants: &Vec<String>,
-) -> String {
-    match cons {
-        Constant::Boolean(val) => {
-            if *val {
-                "true".to_string()
-            } else {
-                "false".to_string()
-            }
-        }
-        Constant::Integer8(val) => format!("{}", val),
-        Constant::Integer16(val) => format!("{}", val),
-        Constant::Integer32(val) => format!("{}", val),
-        Constant::Integer64(val) => format!("{}", val),
-        Constant::UnsignedInteger8(val) => format!("{}", val),
-        Constant::UnsignedInteger16(val) => format!("{}", val),
-        Constant::UnsignedInteger32(val) => format!("{}", val),
-        Constant::UnsignedInteger64(val) => format!("{}", val),
-        Constant::Float32(val) => {
-            if val.fract() == 0.0 {
-                format!("{}.0", val)
-            } else {
-                format!("{}", val)
-            }
-        }
-        Constant::Float64(val) => {
-            if val.fract() == 0.0 {
-                format!("{}.0", val)
-            } else {
-                format!("{}", val)
-            }
-        }
-        Constant::Product(_, _) | Constant::Summation(_, _, _) | Constant::Array(_, _) => {
-            format!("%cons.{}", cons_id.idx())
-        }
-        Constant::Zero(ty_id) => match tys[ty_id.idx()] {
-            Type::Product(_) | Type::Summation(_) | Type::Array(_, _) => {
-                format!("%cons.{}", cons_id.idx())
-            }
-            _ => "zeroinitializer".to_string(),
-        },
-    }
-}
-
-pub(crate) fn generate_constant_strings(module: &Module) -> Vec<String> {
-    // Render constants into LLVM IR. This is done in a very similar manner as
-    // types.
-    let mut llvm_constants = vec!["".to_string(); module.constants.len()];
-    for id in constants_bottom_up(&module.constants) {
-        llvm_constants[id.idx()] = generate_constant_string(
-            id,
-            &module.constants[id.idx()],
-            &module.types,
-            &llvm_constants,
-        );
-    }
-
-    llvm_constants
-}
-
-pub(crate) fn generate_dynamic_constant_strings(module: &Module) -> Vec<String> {
-    // Render dynamic constants into LLVM IR.
-    let mut llvm_dynamic_constants = vec!["".to_string(); module.dynamic_constants.len()];
-    for id in (0..module.dynamic_constants.len()).map(DynamicConstantID::new) {
-        match &module.dynamic_constants[id.idx()] {
-            DynamicConstant::Constant(val) => llvm_dynamic_constants[id.idx()] = format!("{}", val),
-            DynamicConstant::Parameter(_) => {
-                llvm_dynamic_constants[id.idx()] = format!("%dyn_cons.{}", id.idx())
-            }
-        }
-    }
-
-    llvm_dynamic_constants
-}
-
-/*
- * Calculate in-memory size and alignment of a type. The size is optional, since
- * array types with dynamic constant dimensions may not have a compile time
- * known size.
- */
-pub fn type_size_and_alignment(module: &Module, ty: TypeID) -> (Option<usize>, usize) {
-    match module.types[ty.idx()] {
-        Type::Control(_) => {
-            panic!("PANIC: Can't calculate in-memory size and alignment of control type.")
-        }
-        Type::Boolean => (Some(1), 1),
-        Type::Integer8 => (Some(1), 1),
-        Type::Integer16 => (Some(2), 2),
-        Type::Integer32 => (Some(4), 4),
-        Type::Integer64 => (Some(8), 8),
-        Type::UnsignedInteger8 => (Some(1), 1),
-        Type::UnsignedInteger16 => (Some(2), 2),
-        Type::UnsignedInteger32 => (Some(4), 4),
-        Type::UnsignedInteger64 => (Some(8), 8),
-        Type::Float32 => (Some(4), 4),
-        Type::Float64 => (Some(8), 8),
-        Type::Product(ref fields) => {
-            let (size, align) = fields
-                .iter()
-                .map(|ty| type_size_and_alignment(module, *ty))
-                .fold(
-                    (Some(0), 1),
-                    |(acc_size, acc_align), (field_size, field_align)| {
-                        // Alignment of product is maximum alignment of fields.
-                        let new_align = std::cmp::max(acc_align, field_align);
-                        if let (Some(acc_size), Some(field_size)) = (acc_size, field_size) {
-                            // Pre-padding is so that the new field has proper
-                            // alignment within the product.
-                            let mut pre_padding = field_align - acc_size % field_align;
-                            if pre_padding == field_align {
-                                pre_padding = 0;
-                            }
-                            (Some(acc_size + pre_padding + field_size), new_align)
-                        } else {
-                            (None, new_align)
-                        }
-                    },
-                );
-
-            if let Some(size) = size {
-                // Post-padding is so that the overall in-memory size has the
-                // right alignment in an array, and is only done at the end.
-                let mut post_padding = align - size % align;
-                if post_padding == align {
-                    post_padding = 0;
-                }
-                (Some(size + post_padding), align)
-            } else {
-                (None, align)
-            }
-        }
-        Type::Summation(_) => todo!(),
-        Type::Array(elem, ref dims) => {
-            let (maybe_elem_size, elem_align) = type_size_and_alignment(module, elem);
-
-            // We can only calculate the number of elements at compile time if
-            // every dynamic constant dimension is a compile-time constant.
-            let maybe_num_elems = dims.iter().fold(Some(1), |acc, dim| {
-                Some(acc? * module.dynamic_constants[dim.idx()].try_constant()?)
-            });
-
-            // Even if the number of elements is compile-time known, the element
-            // type may have unknown compile-time size.
-            if let (Some(elem_size), Some(num_elems)) = (maybe_elem_size, maybe_num_elems) {
-                (Some(elem_size * num_elems), elem_align)
-            } else {
-                (None, elem_align)
-            }
-        }
-    }
-}
-
-/*
- * Calculate in-memory bytes representing constant. Return the in-memory bytes
- * and the alignment of the constant, if it's non-zero. If it's zero, optionally
- * return the size of the constant, and its alignment. TODO: decide on how to
- * represent memory layouts at the compiler level.
- */
-pub fn embed_constant(module: &Module, cons: ConstantID) -> ConstantBytes {
-    let unchecked = match module.constants[cons.idx()] {
-        // Handle zero constant scalars below.
-        Constant::Boolean(v) => ConstantBytes::NonZero(vec![v as u8], 1),
-        Constant::Integer8(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 1),
-        Constant::Integer16(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 2),
-        Constant::Integer32(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 4),
-        Constant::Integer64(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 8),
-        Constant::UnsignedInteger8(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 1),
-        Constant::UnsignedInteger16(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 2),
-        Constant::UnsignedInteger32(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 4),
-        Constant::UnsignedInteger64(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 8),
-        Constant::Float32(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 4),
-        Constant::Float64(v) => ConstantBytes::NonZero(v.to_ne_bytes().to_vec(), 8),
-        Constant::Product(ty, ref fields) => {
-            let field_bytes: Vec<ConstantBytes> = fields
-                .iter()
-                .map(|field| embed_constant(module, *field))
-                .collect();
-
-            if field_bytes.iter().all(|cb| {
-                if let ConstantBytes::Zero(_, _) = cb {
-                    true
-                } else {
-                    false
-                }
-            }) {
-                // If all of the fields are zero constants, then this is a zero
-                // constant.
-                let (size, align) = type_size_and_alignment(module, ty);
-                ConstantBytes::Zero(size, align)
-            } else {
-                // We only construct the in-memory bytes if there is a non-zero
-                // bytes somewhere.
-                let (mut bytes, align) = field_bytes.into_iter().fold(
-                    (vec![], 0),
-                    |(mut acc_bytes, acc_align), field| {
-                        // Alignment of product is maximum alignment of fields.
-                        let new_align = std::cmp::max(acc_align, field.align());
-
-                        // Pre-padding is so that the new field has proper
-                        // alignment within the product.
-                        while acc_bytes.len() % field.align() != 0 {
-                            acc_bytes.push(0);
-                        }
-                        match field {
-                            ConstantBytes::NonZero(bytes, _) => acc_bytes.extend(&bytes),
-                            ConstantBytes::Zero(size, _) => acc_bytes.extend(repeat(0).take(size.expect("PANIC: Attempted to embed a zero constant with unknown size into a non-zero constant product. Non-zero constants must have compile-time known size. This is probably because an array field is a zero constant with non-constant dynamic constant dimensions."))),
-                        }
-                        (acc_bytes, new_align)
-                    },
-                );
-
-                // Post-padding is so that the overall in-memory vector has the
-                // right size, and is only done at the end.
-                while bytes.len() % align != 0 {
-                    bytes.push(0);
-                }
-                ConstantBytes::NonZero(bytes, align)
-            }
-        }
-        Constant::Summation(_, _, _) => todo!(),
-        Constant::Array(ty, ref elements) => {
-            let element_bytes: Vec<ConstantBytes> = elements
-                .iter()
-                .map(|element| embed_constant(module, *element))
-                .collect();
-
-            let (size, align) = type_size_and_alignment(module, ty);
-            if element_bytes.iter().all(|cb| {
-                if let ConstantBytes::Zero(_, _) = cb {
-                    true
-                } else {
-                    false
-                }
-            }) {
-                // If all of the fields are zero constants, then this is a zero
-                // constant.
-                ConstantBytes::Zero(size, align)
-            } else {
-                let array_bytes: Vec<u8> = element_bytes
-                    .into_iter()
-                    .map(|cb| match cb {
-                        ConstantBytes::NonZero(bytes, _) => bytes,
-                        ConstantBytes::Zero(size, _) => vec![0; size.expect("PANIC: Attempted to embed a zero constant with unknown size into a non-zero constant array. Non-zero constants must have compile-time known size. This is probably because an array element is a zero constant with non-constant dynamic constant dimensions.")],
-                    })
-                    .flatten()
-                    .collect();
-                assert_eq!(array_bytes.len(), size.expect("PANIC: Size of a non-zero constant array is unknown at compile time. All non-zero constants must have compile time known size."), "PANIC: Size of array type calculated by type_size_and_alignment differs from calculated in-memory byte representation's size.");
-                ConstantBytes::NonZero(array_bytes, align)
-            }
-        }
-        Constant::Zero(ty) => {
-            let (size, align) = type_size_and_alignment(module, ty);
-            ConstantBytes::Zero(size, align)
-        }
-    };
-
-    // Catch all code for making zero constant scalars actually
-    // ConstantBytes::Zero variants.
-    if let ConstantBytes::NonZero(bytes, align) = &unchecked {
-        if module.constants[cons.idx()].is_strictly_scalar() && bytes.iter().all(|x| *x == 0) {
-            return ConstantBytes::Zero(Some(bytes.len()), *align);
-        }
-    }
-    unchecked
-}
diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs
deleted file mode 100644
index b806bd502ae84e073b9ca54837e559d6e920b00f..0000000000000000000000000000000000000000
--- a/hercules_cg/src/cpu.rs
+++ /dev/null
@@ -1,1019 +0,0 @@
-extern crate bitvec;
-extern crate hercules_ir;
-
-use std::collections::HashMap;
-use std::collections::VecDeque;
-
-use std::iter::zip;
-
-use std::fmt::Write;
-
-use self::bitvec::prelude::*;
-
-use self::hercules_ir::*;
-
-use crate::*;
-
-/*
- * When assembling LLVM basic blocks, we traverse the nodes in a partition in an
- * ad-hoc order. Thus, we cannot assume block terminators will be visited after
- * data nodes, for example. However, textual LLVM IR requires that the
- * terminator instruction is last. So, we emit nodes into separate strings of
- * LLVM IR that will get stichted together when the block is complete.
- */
-#[derive(Debug)]
-struct LLVMBlock {
-    header: String,
-    phis: String,
-    data: String,
-    terminator: String,
-}
-
-impl<'a> FunctionContext<'a> {
-    /*
-     * Top level function to generate code for a partition, targeting the CPU.
-     */
-    pub(crate) fn codegen_cpu_partition<W: Write>(
-        &self,
-        top_node: NodeID,
-        w: &mut W,
-    ) -> Result<PartitionManifest, std::fmt::Error> {
-        // Step 1: do some analysis to get a bunch of per-partition information.
-        let partition_id = self.plan.partitions[top_node.idx()];
-        let partition_context = PartitionContext::new(self, partition_id, top_node);
-
-        // Step 2: emit the function signature. The partition function
-        // parameters are the function parameters, the partition data inputs,
-        // the array constant pointers, and the dynamic constants.
-        let mut partition_function_parameters = partition_context
-            // The data inputs to this partition. These are the data values
-            // calculated in a different partition in the same function.
-            .partition_input_types
-            .iter()
-            .enumerate()
-            .map(|(idx, ty_id)| {
-                (
-                    self.llvm_types[ty_id.idx()].clone(),
-                    format!("%part_arg.{}", idx),
-                )
-            })
-            // The input types of the overall function.
-            .chain(
-                self.function
-                    .param_types
-                    .iter()
-                    .enumerate()
-                    .map(|(idx, ty_id)| {
-                        (
-                            self.llvm_types[ty_id.idx()].clone(),
-                            format!("%func_arg.{}", idx),
-                        )
-                    }),
-            )
-            // Array constants are passed in, pre-initialized.
-            .chain(
-                self.partition_array_constant_inputs()
-                    .into_iter()
-                    .map(|(id, ty_id)| {
-                        (
-                            self.llvm_types[ty_id.idx()].clone(),
-                            format!("%cons.{}", id.idx()),
-                        )
-                    }),
-            )
-            // Dynamic constants are passed in, since they are only known right
-            // before runtime.
-            .chain(
-                self.partition_dynamic_constant_inputs()
-                    .into_iter()
-                    .map(|id| ("i64".to_string(), format!("%dyn_cons.{}", id.idx()))),
-            );
-
-        write!(
-            w,
-            "define {} @{}_part_{}(",
-            generate_type_string(&partition_context.return_type, &self.llvm_types),
-            self.function.name,
-            partition_id.idx(),
-        )?;
-        let (first_ty, first_param) = partition_function_parameters.next().unwrap();
-        write!(w, "{} {}", first_ty, first_param)?;
-        for (ty, param) in partition_function_parameters {
-            write!(w, ", {} {}", ty, param)?;
-        }
-        write!(w, ") {{\n")?;
-
-        // Step 3: set up basic blocks. A node represents a basic block if its
-        // entry in the basic blocks vector points to itself.
-        let mut llvm_bbs = HashMap::new();
-        for id in &self.partitions_inverted_map[partition_id.idx()] {
-            if self.bbs[id.idx()] == *id {
-                llvm_bbs.insert(
-                    id,
-                    LLVMBlock {
-                        header: format!("bb_{}:\n", id.idx()),
-                        phis: "".to_string(),
-                        data: "".to_string(),
-                        terminator: "".to_string(),
-                    },
-                );
-            }
-        }
-
-        // Step 4: emit nodes. Nodes are emitted into basic blocks separately as
-        // nodes are not necessarily emitted in order. Assemble worklist of
-        // nodes, starting as reverse post order of nodes. For non-phi and non-
-        // reduce nodes, only emit once all data uses are emitted. In addition,
-        // consider additional anti-dependence edges from read to write nodes.
-        let mut visited = bitvec![u8, Lsb0; 0; self.function.nodes.len()];
-        let mut worklist = VecDeque::from(partition_context.reverse_postorder.clone());
-        while let Some(id) = worklist.pop_front() {
-            if !(self.function.nodes[id.idx()].is_phi()
-                || self.function.nodes[id.idx()].is_reduce())
-                && !get_uses(&self.function.nodes[id.idx()])
-                    .as_ref()
-                    .into_iter()
-                    // If this node isn't a phi or reduce, we need to check that
-                    // all uses, as well as all reads we anti-depend with, have
-                    // been emitted.
-                    .chain(self.antideps.iter().filter_map(|(read, write)| {
-                        if id == *write {
-                            Some(read)
-                        } else {
-                            None
-                        }
-                    }))
-                    // Only data dependencies inside this partition need to have
-                    // already been visited.
-                    .all(|id| {
-                        self.plan.partitions[id.idx()] != partition_id
-                            || self.function.nodes[id.idx()].is_control()
-                            || visited[id.idx()]
-                    })
-            {
-                // Skip emitting node if it's not a phi or reduce node and if
-                // its data uses are not emitted yet.
-                worklist.push_back(id);
-            } else {
-                // Once all of the data dependencies for this node are emitted,
-                // this node can be emitted. For reduce nodes specifically, we
-                // want to emit the phi in the fork's basic block, not the
-                // join's, so we handle that ugly case here. This is because
-                // there is a fundamental mismatch between Hercules' notion of
-                // reductions and LLVM's phi nodes. This is ok, since we can
-                // translate between the two. It's just a pain.
-                let bb = if let Node::Reduce {
-                    control,
-                    init: _,
-                    reduct: _,
-                } = self.function.nodes[id.idx()]
-                {
-                    // Figure out the fork corresponding to the associated join.
-                    let fork_id = if let Node::Join { control } = self.function.nodes[control.idx()]
-                    {
-                        if let Type::Control(factors) =
-                            &self.types[self.typing[control.idx()].idx()]
-                        {
-                            *factors.last().unwrap()
-                        } else {
-                            panic!("PANIC: Type of join node associated with reduce node is not a control type.")
-                        }
-                    } else {
-                        panic!("PANIC: Node associated with reduce node isn't a join node.")
-                    };
-
-                    // Emit in the basic block of the fork.
-                    llvm_bbs.get_mut(&self.bbs[fork_id.idx()]).unwrap()
-                } else {
-                    // In the normal case, emit in the basic block the node has
-                    // been actually assigned to.
-                    llvm_bbs.get_mut(&self.bbs[id.idx()]).unwrap()
-                };
-                partition_context.codegen_cpu_node(id, bb)?;
-                visited.set(id.idx(), true);
-            }
-        }
-
-        // Step 5: emit the now completed basic blocks, in order. Emit a dummy
-        // header block to unconditionally jump to the "top" basic block. Also
-        // emit allocas for compile-time known sized constants. TODO: only emit
-        // used constants, not all the constants in the module. TODO: emit sum
-        // constants.
-        write!(w, "bb_header:\n")?;
-        for cons_id in (0..self.constants.len()).map(ConstantID::new) {
-            if let Some(ty_id) = self.constants[cons_id.idx()].try_product_type(&self.types) {
-                if let (Some(size), align) = self.type_sizes_aligns[ty_id.idx()] {
-                    write!(
-                        w,
-                        "  %cons.{} = alloca i8, i32 {}, align {}\n",
-                        cons_id.idx(),
-                        size,
-                        align
-                    )?;
-                }
-            }
-        }
-        write!(w, "  br label %bb_{}\n", top_node.idx())?;
-        for id in partition_context.reverse_postorder {
-            if self.bbs[id.idx()] == id {
-                write!(
-                    w,
-                    "{}{}{}{}",
-                    llvm_bbs[&id].header,
-                    llvm_bbs[&id].phis,
-                    llvm_bbs[&id].data,
-                    llvm_bbs[&id].terminator
-                )?;
-            }
-        }
-
-        // Step 6: close the partition function - we're done. The partition
-        // manifest is created by the partition context.
-        write!(w, "}}\n\n")?;
-        Ok(partition_context.manifest)
-    }
-}
-
-impl<'a> PartitionContext<'a> {
-    /*
-     * Emit LLVM IR implementing a single node.
-     */
-    fn codegen_cpu_node(&self, id: NodeID, bb: &mut LLVMBlock) -> std::fmt::Result {
-        // Helper to emit code to index a collection. All collections are
-        // pointers to some memory at the LLVM IR level. This memory is passed
-        // in as a parameter for anything involving arrays, and is alloca-ed for
-        // product and summation types.
-        // TODO: actually do this ^ for products. Right now, products are still
-        // done at the LLVM struct level w/ GEP and so on. Apologies for anyone
-        // else reading this comment.
-        let mut generate_index_code = |collect: NodeID, indices: &[Index]| -> std::fmt::Result {
-            // Step 1: calculate the list of collection types corresponding to
-            // each index.
-            let mut collection_ty_ids = vec![];
-            let mut curr_ty_id = self.function.typing[collect.idx()];
-            for index in indices {
-                match (index, &self.function.types[curr_ty_id.idx()]) {
-                    (Index::Field(idx), Type::Product(ty_ids))
-                    | (Index::Variant(idx), Type::Summation(ty_ids)) => {
-                        collection_ty_ids.push(curr_ty_id);
-                        curr_ty_id = ty_ids[*idx];
-                    }
-                    (Index::Position(_), Type::Array(elem_ty_id, _)) => {
-                        collection_ty_ids.push(curr_ty_id);
-                        curr_ty_id = *elem_ty_id;
-                    }
-                    _ => {
-                        panic!("PANIC: Found unsupported combination of index and collection type.")
-                    }
-                }
-            }
-            assert!(
-                self.function.types[curr_ty_id.idx()].is_primitive(),
-                "PANIC: Cannot generate partial indexing code."
-            );
-
-            // Step 2: calculate, as LLVM IR values, the stride and offset
-            // needed at each level of the collection. For products, the stride
-            // is calculated using a getelementptr hack (and is the size of the
-            // struct), and the offset corresponds to the field index (which is
-            // translated to an offset using another getelementptr hack). For
-            // arrays, the stride is the dynamic constant extent multiplied by
-            // the stride of the element type, and the offset is the position
-            // index multiplied by the stride of the element type. Additionally,
-            // emit code to add up all of the offsets to get a total offset into
-            // the collection. TODO: to support summations, and arrays in
-            // arbitrary places, we need to not use the hacky getelementptr
-            // technique, since LLVM IR can't represent arrays (in the Hercules
-            // sense) or summations as primitive types. Instead, we need to do
-            // collection memory layout entirely ourselves.
-            let elem_llvm_ty = &self.function.llvm_types[curr_ty_id.idx()];
-            write!(bb.data, "  %index{}.{}.total_offset = add i64 0, 0\n  %index{}.{}.stride.ptrhack = getelementptr {}, ptr null, i64 1\n  %index{}.{}.stride = ptrtoint ptr %index{}.{}.stride.ptrhack to i64\n",
-                   id.idx(), indices.len(), id.idx(), indices.len(), elem_llvm_ty, id.idx(), indices.len(), id.idx(), indices.len()
-            )?;
-            for (idx, index) in indices.into_iter().enumerate().rev() {
-                match index {
-                    Index::Field(field) => {
-                        let product_llvm_ty =
-                            &self.function.llvm_types[collection_ty_ids[idx].idx()];
-                        write!(
-                            bb.data,
-                            "  %index{}.{}.stride.ptrhack = getelementptr {}, ptr null, i64 1\n  %index{}.{}.stride = ptrtoint ptr %index{}.{}.stride.ptrhack to i64\n  %index{}.{}.offset.ptrhack = getelementptr {}, ptr null, i64 0, i32 {}\n  %index{}.{}.offset = ptrtoint ptr %index{}.{}.offset.ptrhack to i64\n",
-                            id.idx(), idx,
-                            product_llvm_ty,
-                            id.idx(), idx,
-                            id.idx(), idx,
-                            id.idx(), idx,
-                            product_llvm_ty,
-                            field,
-                            id.idx(), idx,
-                            id.idx(), idx,
-                        )?;
-                    }
-                    Index::Variant(_) => todo!(),
-                    Index::Position(position) => {
-                        let array_extents = self.function.types[collection_ty_ids[idx].idx()]
-                            .try_extents()
-                            .unwrap();
-
-                        // TODO: calculate stride for arrays, needed for arrays
-                        // nested in other collections.
-                        write!(bb.data, "  %index{}.{}.offset.add.0 = add ", id.idx(), idx)?;
-                        self.cpu_emit_use_of_node(position[0], Some(id), true, &mut bb.data)?;
-                        write!(bb.data, ", {}\n", 0)?;
-                        for (dim_idx, (extent_dc_id, position_id)) in
-                            zip(array_extents, position.into_iter()).enumerate().skip(1)
-                        {
-                            write!(
-                                bb.data,
-                                "  %index{}.{}.offset.mul.{} = mul i64 {}, %index{}.{}.offset.add.{}\n",
-                                id.idx(), idx,
-                                dim_idx,
-                                self.function.llvm_dynamic_constants[extent_dc_id.idx()],
-                                id.idx(), idx,
-                                dim_idx - 1
-                            )?;
-                            write!(
-                                bb.data,
-                                "  %index{}.{}.offset.add.{} = add ",
-                                id.idx(),
-                                idx,
-                                dim_idx
-                            )?;
-                            self.cpu_emit_use_of_node(*position_id, Some(id), true, &mut bb.data)?;
-                            write!(
-                                bb.data,
-                                ", %index{}.{}.offset.mul.{}\n",
-                                id.idx(),
-                                idx,
-                                dim_idx
-                            )?;
-                        }
-                        write!(bb.data, "  %index{}.{}.offset = mul i64 %index{}.{}.stride, %index{}.{}.offset.add.{}\n", id.idx(), idx, id.idx(), idx + 1, id.idx(), idx, position.len() - 1)?;
-                    }
-                    Index::Control(_) => panic!(
-                        "PANIC: Found control index when generating collection indexing code."
-                    ),
-                }
-                write!(
-                    bb.data,
-                    "  %index{}.{}.total_offset = add i64 %index{}.{}.total_offset, %index{}.{}.offset\n",
-                    id.idx(), idx,
-                    id.idx(), idx + 1,
-                    id.idx(), idx
-                )?;
-            }
-
-            // Step 3: emit the getelementptr using the total collection offset.
-            write!(bb.data, "  %index{} = getelementptr i8, ptr ", id.idx(),)?;
-            self.cpu_emit_use_of_node(collect, Some(id), false, &mut bb.data)?;
-            write!(bb.data, ", i64 %index{}.0.total_offset\n", id.idx())?;
-
-            Ok(())
-        };
-
-        // Helper to find the basic block corresponding to a particular control
-        // predecessor, for phi nodes. This is needed for when a predecessor
-        // basic block is in a different partition. In this case, the phi's
-        // control predecessor is set to the top block of the partition.
-        let get_phi_predecessor = |pred_id: NodeID| {
-            if self.function.plan.partitions[pred_id.idx()] == self.partition_id {
-                format!("{}", self.function.bbs[pred_id.idx()].idx())
-            } else {
-                format!("header")
-            }
-        };
-
-        // Emit the primary IR for each node.
-        match self.function.function.nodes[id.idx()] {
-            Node::Start | Node::Region { preds: _ } => {
-                // Basic blocks containing a start or region node branch
-                // unconditionally to their single successor.
-                let successor = self
-                    .function
-                    .def_use
-                    .get_users(id)
-                    .iter()
-                    .filter(|id| self.function.function.nodes[id.idx()].is_strictly_control())
-                    .next()
-                    .unwrap();
-                bb.terminator = format!("  br label %bb_{}\n", successor.idx());
-            }
-            Node::If { control: _, cond } => {
-                let successors = self.function.def_use.get_users(id);
-
-                // Determine the order of the successors (true/false or false/
-                // true) in the successors slice.
-                let rev = if let Node::Read {
-                    collect: _,
-                    indices,
-                } = &self.function.function.nodes[successors[0].idx()]
-                {
-                    indices[0] != Index::Control(0)
-                } else {
-                    panic!("PANIC: Successor of if node isn't a read node.")
-                };
-                bb.terminator = "  br ".to_string();
-                self.cpu_emit_use_of_node(cond, Some(id), true, &mut bb.terminator)?;
-                write!(
-                    bb.terminator,
-                    ", label %bb_{}, label %bb_{}\n",
-                    successors[(!rev) as usize].idx(),
-                    successors[rev as usize].idx()
-                )?;
-            }
-            Node::Fork { control, factor: _ } => {
-                // Calculate the join and successor.
-                let join = self.function.fork_join_map[&id];
-                let successor = self
-                    .function
-                    .def_use
-                    .get_users(id)
-                    .iter()
-                    .filter(|id| self.function.function.nodes[id.idx()].is_strictly_control())
-                    .next()
-                    .unwrap();
-
-                // Create the phi node for the loop index. This is used directly
-                // by any thread ID user nodes. The control predecessor basic
-                // blocks are the control node preceding the fork and the
-                // corresponding join.
-                write!(bb.phis, "  ")?;
-                self.cpu_emit_use_of_node(id, None, false, &mut bb.phis)?;
-                write!(
-                    bb.phis,
-                    " = phi i64 [ 0, %bb_{} ], [ %fork_inc{}, %bb_{} ]\n",
-                    get_phi_predecessor(self.function.bbs[control.idx()]),
-                    id.idx(),
-                    get_phi_predecessor(self.function.bbs[join.idx()]),
-                )?;
-
-                // Increment the loop index by one each iteration.
-                write!(bb.data, "  %fork_inc{} = add i64 1, ", id.idx())?;
-                self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?;
-                write!(bb.data, "\n")?;
-
-                // Branch to the successor basic block.
-                write!(
-                    bb.terminator,
-                    "  br label %bb_{}\n",
-                    self.function.bbs[successor.idx()].idx()
-                )?;
-            }
-            Node::Join { control } => {
-                // Get the fork, it's factor, and the successor to this join.
-                let fork_id = if let Type::Control(factors) =
-                    &self.function.types[self.function.typing[control.idx()].idx()]
-                {
-                    *factors.last().unwrap()
-                } else {
-                    panic!("PANIC: The type of a join node is incorrect.")
-                };
-                let factor = if let Node::Fork { control: _, factor } =
-                    &self.function.function.nodes[fork_id.idx()]
-                {
-                    *factor
-                } else {
-                    panic!("PANIC: The node referenced by the control type of a join node is not a fork.")
-                };
-                let successor = self
-                    .function
-                    .def_use
-                    .get_users(id)
-                    .iter()
-                    .filter(|id| self.function.function.nodes[id.idx()].is_strictly_control())
-                    .next()
-                    .unwrap();
-
-                // Form the bottom of the loop. Check if the loop is finished,
-                // and branch between the successor and the fork. The structure
-                // of this loop implies that fork-joins have to iterate at least
-                // once. Change the loop termination branch target if this is a
-                // control return (see comment below for more details).
-                let is_control_return = self.control_returns.contains(&id);
-                write!(
-                    bb.terminator,
-                    "  %join_cond{} = icmp ult i64 %fork_inc{}, {}\n",
-                    id.idx(),
-                    fork_id.idx(),
-                    self.function.llvm_dynamic_constants[factor.idx()]
-                )?;
-                write!(
-                    bb.terminator,
-                    "  br i1 %join_cond{}, label %bb_{}, label %bb_{}\n",
-                    id.idx(),
-                    self.function.bbs[fork_id.idx()].idx(),
-                    if is_control_return {
-                        format!("{}_join_cr", id.idx())
-                    } else {
-                        format!("{}", self.function.bbs[successor.idx()].idx())
-                    }
-                )?;
-
-                // Join nodes are the only node that can be a control return
-                // from a partition and generate a conditional branch. This
-                // means we have to do this really ugly hack where we insert
-                // another basic block to be the control return that we
-                // conditionally branch to. Other control nodes that may be
-                // control returns don't have this problem, because they always
-                // unconditionally branch to their destination. We add this LLVM
-                // IR text of a new basic block in the terminator of the current
-                // basic block, since we don't have mutable access here to the
-                // set of all LLVM basic blocks.
-                if is_control_return {
-                    write!(bb.terminator, "bb_{}_join_cr:\n", id.idx())?;
-                }
-            }
-            Node::Phi {
-                control: _,
-                ref data,
-            } => {
-                // For each predecessor of the associated region, we determine
-                // if that predecessor is in this partition or not. If so, then
-                // the predecessor control is just the basic block of the
-                // predecessor control node. If not, the predecessor control is
-                // the first basic block of the partition. The corresponding
-                // datum also needs to be provided by argument to the partition,
-                // and this is handled by cpu_emit_use_of_node.
-                let pred_ids =
-                    get_uses(&self.function.function.nodes[self.function.bbs[id.idx()].idx()]);
-                let mut control_datum_pairs = zip(data.into_iter(), pred_ids.as_ref().iter())
-                    .map(|(datum, pred_id)| (*datum, get_phi_predecessor(*pred_id)));
-
-                // TODO: this code burns my eyes to look at, it might be worth
-                // making this not carcinogenic.
-                write!(bb.phis, "  ")?;
-                self.cpu_emit_use_of_node(id, None, false, &mut bb.phis)?;
-                write!(
-                    bb.phis,
-                    " = phi {} [ ",
-                    self.function.llvm_types[self.function.typing[id.idx()].idx()]
-                )?;
-                let (first_data, first_control) = control_datum_pairs.next().unwrap();
-                self.cpu_emit_use_of_node(first_data, Some(id), false, &mut bb.phis)?;
-                write!(bb.phis, ", %bb_{} ]", first_control)?;
-                for (data, control) in control_datum_pairs {
-                    write!(bb.phis, ", [ ")?;
-                    self.cpu_emit_use_of_node(data, Some(id), false, &mut bb.phis)?;
-                    write!(bb.phis, ", %bb_{} ]", control)?;
-                }
-                write!(bb.phis, "\n")?;
-            }
-            Node::ThreadID { control } => {
-                // Just bitcast the loop index from the fork. The bitcast is a
-                // no-op, but we add it to copy the value from the virtual
-                // register the fork generates to the virtual register
-                // corresponding to this thread ID node.
-                assert!(self.function.function.nodes[control.idx()].is_fork());
-                write!(bb.data, "  ")?;
-                self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?;
-                write!(bb.data, " = bitcast i64 ",)?;
-                self.cpu_emit_use_of_node(control, Some(id), false, &mut bb.data)?;
-                write!(bb.data, " to i64\n",)?;
-            }
-            Node::Reduce {
-                control,
-                init,
-                reduct,
-            } => {
-                // Figure out the fork corresponding to the associated join.
-                let fork_id = if let Node::Join { control } =
-                    self.function.function.nodes[control.idx()]
-                {
-                    if let Type::Control(factors) =
-                        &self.function.types[self.function.typing[control.idx()].idx()]
-                    {
-                        *factors.last().unwrap()
-                    } else {
-                        panic!("PANIC: Type of join node associated with reduce node is not a control type.")
-                    }
-                } else {
-                    panic!("PANIC: Node associated with reduce node isn't a join node.")
-                };
-
-                // Figure out the fork's predecessor.
-                let pred = if let Node::Fork { control, factor: _ } =
-                    self.function.function.nodes[fork_id.idx()]
-                {
-                    control
-                } else {
-                    panic!("PANIC: Node referenced in type of join node associated with a reduce node is not a fork node.")
-                };
-
-                // Reduce nodes just lower to phi nodes. We already did the ugly
-                // hack so that "bb" refers to the basic block of the fork,
-                // rather than the join. So, now we just need to emit the phi.
-                write!(bb.phis, "  ")?;
-                self.cpu_emit_use_of_node(id, Some(id), false, &mut bb.phis)?;
-                write!(
-                    bb.phis,
-                    " = phi {} [ ",
-                    self.function.llvm_types[self.function.typing[id.idx()].idx()]
-                )?;
-                self.cpu_emit_use_of_node(init, Some(id), false, &mut bb.phis)?;
-                write!(
-                    bb.phis,
-                    ", %bb_{} ], [ ",
-                    get_phi_predecessor(self.function.bbs[pred.idx()])
-                )?;
-                self.cpu_emit_use_of_node(reduct, Some(id), false, &mut bb.phis)?;
-                write!(
-                    bb.phis,
-                    ", %bb_{} ]\n",
-                    get_phi_predecessor(self.function.bbs[control.idx()])
-                )?;
-            }
-            // These nodes are handled by other mechanisms in the code lowering
-            // process.
-            Node::Return {
-                control: _,
-                data: _,
-            }
-            | Node::Parameter { index: _ }
-            | Node::Constant { id: _ }
-            | Node::DynamicConstant { id: _ } => {}
-            Node::Binary { left, right, op } => {
-                let op = match op {
-                    BinaryOperator::Add => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
-                            "fadd"
-                        } else {
-                            "add"
-                        }
-                    }
-                    BinaryOperator::Sub => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
-                            "fsub"
-                        } else {
-                            "sub"
-                        }
-                    }
-                    BinaryOperator::Mul => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
-                            "fmul"
-                        } else {
-                            "mul"
-                        }
-                    }
-                    BinaryOperator::Div => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
-                            "fdiv"
-                        } else if self.function.types[self.function.typing[left.idx()].idx()]
-                            .is_unsigned()
-                        {
-                            "udiv"
-                        } else {
-                            "sdiv"
-                        }
-                    }
-                    BinaryOperator::Rem => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
-                            "frem"
-                        } else if self.function.types[self.function.typing[left.idx()].idx()]
-                            .is_unsigned()
-                        {
-                            "urem"
-                        } else {
-                            "srem"
-                        }
-                    }
-                    BinaryOperator::LT => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
-                            "fcmp olt"
-                        } else if self.function.types[self.function.typing[left.idx()].idx()]
-                            .is_unsigned()
-                        {
-                            "icmp ult"
-                        } else {
-                            "icmp slt"
-                        }
-                    }
-                    BinaryOperator::LTE => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
-                            "fcmp ole"
-                        } else if self.function.types[self.function.typing[left.idx()].idx()]
-                            .is_unsigned()
-                        {
-                            "icmp ule"
-                        } else {
-                            "icmp sle"
-                        }
-                    }
-                    BinaryOperator::GT => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
-                            "fcmp ogt"
-                        } else if self.function.types[self.function.typing[left.idx()].idx()]
-                            .is_unsigned()
-                        {
-                            "icmp ugt"
-                        } else {
-                            "icmp sgt"
-                        }
-                    }
-                    BinaryOperator::GTE => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
-                            "fcmp oge"
-                        } else if self.function.types[self.function.typing[left.idx()].idx()]
-                            .is_unsigned()
-                        {
-                            "icmp uge"
-                        } else {
-                            "icmp sge"
-                        }
-                    }
-                    BinaryOperator::EQ => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
-                            "fcmp oeq"
-                        } else {
-                            "icmp eq"
-                        }
-                    }
-                    BinaryOperator::NE => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
-                            "fcmp one"
-                        } else {
-                            "icmp ne"
-                        }
-                    }
-                    BinaryOperator::Or => "or",
-                    BinaryOperator::And => "and",
-                    BinaryOperator::Xor => "xor",
-                    BinaryOperator::LSh => "lsh",
-                    BinaryOperator::RSh => {
-                        if self.function.types[self.function.typing[left.idx()].idx()].is_unsigned()
-                        {
-                            "lshr"
-                        } else {
-                            "ashr"
-                        }
-                    }
-                };
-                write!(bb.data, "  ")?;
-                self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?;
-                write!(bb.data, " = {} ", op)?;
-                self.cpu_emit_use_of_node(left, Some(id), true, &mut bb.data)?;
-                write!(bb.data, ", ")?;
-                self.cpu_emit_use_of_node(right, Some(id), false, &mut bb.data)?;
-                write!(bb.data, "\n")?;
-            }
-            Node::Read {
-                collect,
-                ref indices,
-            } => {
-                if self.function.function.nodes[collect.idx()].is_strictly_control() {
-                    // Read nodes may be projection succesors of if or match
-                    // nodes.
-                    let successor = self.function.def_use.get_users(id)[0];
-                    write!(
-                        bb.terminator,
-                        "  br label %bb_{}\n",
-                        self.function.bbs[successor.idx()].idx()
-                    )?;
-                } else {
-                    generate_index_code(collect, indices)?;
-                    write!(bb.data, "  ")?;
-                    self.cpu_emit_use_of_node(id, Some(id), false, &mut bb.data)?;
-                    write!(
-                        bb.data,
-                        " = load {}, ptr %index{}\n",
-                        self.function.llvm_types[self.function.typing[id.idx()].idx()],
-                        id.idx(),
-                    )?;
-                }
-            }
-            Node::Write {
-                collect,
-                data,
-                ref indices,
-            } => {
-                generate_index_code(collect, indices)?;
-                write!(
-                    bb.data,
-                    "  store {} ",
-                    self.function.llvm_types[self.function.typing[data.idx()].idx()]
-                )?;
-                self.cpu_emit_use_of_node(data, Some(id), false, &mut bb.data)?;
-                write!(bb.data, ", ptr %index{}\n", id.idx())?;
-
-                // We can't just "copy" in LLVM IR, but we want to forward the
-                // pointer, unchanged, as the "output" of this write node. The
-                // easiest way to do this is to insert a useless bitcast.
-                write!(bb.data, "  ")?;
-                self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?;
-                write!(bb.data, " = bitcast ptr ")?;
-                self.cpu_emit_use_of_node(collect, Some(id), false, &mut bb.data)?;
-                write!(bb.data, " to ptr\n")?;
-            }
-            _ => {
-                eprintln!("TO LOWER: {:?}", self.function.function.nodes[id.idx()]);
-            }
-        }
-
-        // If this node is a control return, we emit a return from this
-        // partition function.
-        if self.control_returns.contains(&id) {
-            // Get rid of the old terminator, replace with return. Don't do this
-            // if this node is a join node, since in that specific case we
-            // generate specific control return logic. See the join node codegen
-            // above for more details.
-            if !self.function.function.nodes[id.idx()].is_join() {
-                bb.terminator.clear();
-            }
-
-            // Making structs from the aggregated values in LLVM IR is a pain.
-            // We need to, one-by-one, insertvalue each element into the struct.
-            let ret_ty_str = generate_type_string(&self.return_type, &self.function.llvm_types);
-            for (idx, data_output_id) in self.data_outputs.iter().enumerate() {
-                write!(
-                    bb.terminator,
-                    "  %ret_agg{}.{} = insertvalue {} {}, ",
-                    id.idx(),
-                    idx,
-                    ret_ty_str,
-                    if idx == 0 {
-                        "undef".to_string()
-                    } else {
-                        format!("%ret_agg{}.{}", id.idx(), idx - 1)
-                    }
-                )?;
-                let mut data_output_id = *data_output_id;
-
-                // Handle reduce specially here. Technically, the "user" here is
-                // the join node, so cpu_emit_use_of_node would normally emit
-                // the reduce node's virtual register directly. However, if a
-                // data output is the result of a reduce node, that is
-                // definitely outside for the corresponding fork-join. Thus, we
-                // actually need to use the reduction use of the reduce node.
-                // This all only applies if the reduce node is in the current
-                // partition. If not, then use the reduce node as the argument
-                // to cpu_emit_use_of_node as normal, so that the partition
-                // function argument is properly used.
-                while let Node::Reduce {
-                    control: _,
-                    init: _,
-                    reduct,
-                } = self.function.function.nodes[data_output_id.idx()]
-                    && self.partition_id == self.function.plan.partitions[data_output_id.idx()]
-                {
-                    data_output_id = reduct;
-                }
-                self.cpu_emit_use_of_node(data_output_id, None, true, &mut bb.terminator)?;
-                write!(bb.terminator, ", {}\n", idx)?;
-            }
-
-            // Now, we can return the aggregate value we calculated.
-            if self.data_outputs.is_empty() && self.control_returns.len() == 1 {
-                // If there are no data outputs, just return the empty struct.
-                write!(bb.terminator, "  ret {} zeroinitializer\n", ret_ty_str)?;
-            } else if self.data_outputs.is_empty() {
-                // If there are multiple control returns, we need to return the
-                // node ID of the control return, so that the runtime can do
-                // control flow between partitions. In this case, there aren't
-                // any data outputs that also need to be returned.
-                write!(bb.terminator, "  %ret_agg{}.ctrl_pos = insertvalue {} undef, i64 {}, 0\n  ret {} %ret_agg{}.ctrl_pos\n",
-                       id.idx(),
-                       ret_ty_str,
-                       id.idx(),
-                       ret_ty_str,
-                       id.idx()
-                )?;
-            } else if self.control_returns.len() == 1 {
-                // In the normal case, we return the struct containing just the
-                // data outputs.
-                write!(
-                    bb.terminator,
-                    "  ret {} %ret_agg{}.{}\n",
-                    ret_ty_str,
-                    id.idx(),
-                    self.data_outputs.len() - 1,
-                )?;
-            } else {
-                // If there are multiple control returns from this partition and
-                // there are data outputs, we add the control return node ID to
-                // the return aggregate.
-                write!(
-                    bb.terminator,
-                    "  %ret_agg{}.ctrl_pos = insertvalue {} %ret_agg{}.{}, i64 {}, {}\n  ret {} %ret_agg{}.ctrl_pos\n",
-                    id.idx(),
-                    ret_ty_str,
-                    id.idx(),
-                    self.data_outputs.len() - 1,
-                    id.idx(),
-                    self.data_outputs.len(),
-                    ret_ty_str,
-                    id.idx(),
-                )?;
-            }
-        }
-
-        Ok(())
-    }
-
-    /*
-     * Emit the LLVM value corresponding to a node. Optionally prefix with the
-     * LLVM type, which is required by textual LLVM IR in a few places.
-     * Optionally provide the node that will be using this emission. This is
-     * unused by all emitted node values except reduce nodes, which require the
-     * user argument to be given. We chose this interface because at the
-     * callsite of a cpu_emit_use_of_node, it is always known whether this thing
-     * being emitted could (or should) possibly be a reduce node. If not, then
-     * providing none gives a nice early panic when it is a reduce node, either
-     * because the developer misjudged or because there is a bug.
-     */
-    fn cpu_emit_use_of_node<W: Write>(
-        &self,
-        id: NodeID,
-        user: Option<NodeID>,
-        emit_type: bool,
-        w: &mut W,
-    ) -> std::fmt::Result {
-        // First, emit the type before the value (if applicable).
-        if emit_type {
-            write!(
-                w,
-                "{} ",
-                self.function.llvm_types[self.function.typing[id.idx()].idx()]
-            )?;
-        }
-
-        // Emitting the value can be surprisingly complicated, depending on what
-        // the node is. For example, partition arguments are emitted specially.
-        if let Some(input_idx) = self.data_inputs.iter().position(|inp_id| *inp_id == id) {
-            // If a use is in another partition, it needs to get passed to this
-            // partition's function as a parameter.
-            write!(w, "%part_arg.{}", input_idx)?;
-        } else {
-            match self.function.function.nodes[id.idx()] {
-                // Parameter nodes in this partition also represent parameters
-                // to this partition function.
-                Node::Parameter { index } => write!(w, "%func_arg.{}", index)?,
-                // Constants are pre-defined.
-                Node::Constant { id } => write!(w, "{}", self.function.llvm_constants[id.idx()])?,
-                Node::DynamicConstant { id } => {
-                    write!(w, "{}", self.function.llvm_dynamic_constants[id.idx()])?
-                }
-                // Reduce nodes, as usual, are not nice to handle. We need to
-                // emit different LLVM depending on whether the user is inside
-                // or outside the reduce's corresponding fork-join nest. Inside,
-                // we emit as usual, since the user needs to use the phi node
-                // inside the reduction loop. Outside, we need to use the reduct
-                // use of the reduce node, so that we don't grab the reduction
-                // variable one loop iteration too early.
-                Node::Reduce {
-                    control,
-                    init: _,
-                    reduct,
-                } => {
-                    // Figure out the fork corresponding to the associated join.
-                    let fork_id = if let Node::Join { control } =
-                        self.function.function.nodes[control.idx()]
-                    {
-                        if let Type::Control(factors) =
-                            &self.function.types[self.function.typing[control.idx()].idx()]
-                        {
-                            *factors.last().unwrap()
-                        } else {
-                            panic!()
-                        }
-                    } else {
-                        panic!()
-                    };
-
-                    // Check if the basic block containing the user node is in
-                    // the fork-join nest for this reduce node. We make the user
-                    // node an optional argument as a debugging tool - if we
-                    // exercise this code branch when generating the code for a
-                    // node that absolutely should not be using the result of a
-                    // reduce node, we would like to know!
-                    if self.function.fork_join_nest[&self.function.bbs[user.expect("PANIC: cpu_emit_use_of_node was called on a reduce node, but no user node ID was given.").idx()]]
-                        .contains(&fork_id)
-                    {
-                        // If the user is inside the fork-join nest, then emit
-                        // the reduce node directly.
-                        assert_eq!(self.partition_id, self.function.plan.partitions[id.idx()]);
-                        write!(w, "%virt.{}", id.idx())?;
-                    } else {
-                        // If the user is outside the fork-join nest, then
-                        // recursively emit on the reduction input to the reduce
-                        // node. This is needed when there is a reduce chain.
-                        assert_eq!(
-                            self.partition_id,
-                            self.function.plan.partitions[reduct.idx()]
-                        );
-                        self.cpu_emit_use_of_node(reduct, user, emit_type, w)?;
-                    }
-                }
-                // Uses that are in this partition are just virtual registers.
-                // Clang is really annoying about numbering virtual registers,
-                // so to avoid that silliness we prepend all our virtual
-                // registers with a prefix indicating what kind of thing it is.
-                // For normal values, we use "virt" for virtual register.
-                _ => {
-                    assert_eq!(self.partition_id, self.function.plan.partitions[id.idx()]);
-                    write!(w, "%virt.{}", id.idx())?;
-                }
-            }
-        }
-
-        Ok(())
-    }
-}
diff --git a/hercules_cg/src/lib.rs b/hercules_cg/src/lib.rs
deleted file mode 100644
index c33fd857ca71b4c82135d7ce933ea441d3ece4f4..0000000000000000000000000000000000000000
--- a/hercules_cg/src/lib.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-#![feature(let_chains)]
-
-pub mod common;
-pub mod cpu;
-pub mod top;
-
-pub use crate::common::*;
-pub use crate::cpu::*;
-pub use crate::top::*;
diff --git a/hercules_cg/src/top.rs b/hercules_cg/src/top.rs
deleted file mode 100644
index 2da69355803a695e2ef2266e51b00ce3f15ac0f9..0000000000000000000000000000000000000000
--- a/hercules_cg/src/top.rs
+++ /dev/null
@@ -1,204 +0,0 @@
-extern crate hercules_ir;
-
-use std::collections::HashMap;
-use std::fmt::Write;
-
-use self::hercules_ir::*;
-
-use crate::*;
-
-/*
- * Top level function to generate code for a module. Emits LLVM IR text. Calls
- * out to backends to generate code for individual partitions. Creates a
- * manifest describing the generated code.
- */
-pub fn codegen<W: Write>(
-    module: &Module,
-    def_uses: &Vec<ImmutableDefUseMap>,
-    reverse_postorders: &Vec<Vec<NodeID>>,
-    typing: &ModuleTyping,
-    control_subgraphs: &Vec<Subgraph>,
-    fork_join_maps: &Vec<HashMap<NodeID, NodeID>>,
-    fork_join_nests: &Vec<HashMap<NodeID, Vec<NodeID>>>,
-    antideps: &Vec<Vec<(NodeID, NodeID)>>,
-    bbs: &Vec<Vec<NodeID>>,
-    plans: &Vec<Plan>,
-    w: &mut W,
-) -> Result<ModuleManifest, std::fmt::Error> {
-    // Render types, constants, and dynamic constants into LLVM IR.
-    let llvm_types = generate_type_strings(module);
-    let llvm_constants = generate_constant_strings(module);
-    let llvm_dynamic_constants = generate_dynamic_constant_strings(module);
-    let type_sizes_aligns = (0..module.types.len())
-        .map(|idx| {
-            if module.types[idx].is_control() {
-                (None, 0)
-            } else {
-                type_size_and_alignment(module, TypeID::new(idx))
-            }
-        })
-        .collect();
-
-    // Generate a dummy uninitialized global - this is needed so that there'll
-    // be a non-empty .bss section in the ELF object file.
-    write!(w, "@dummy = dso_local global i32 0, align 4\n")?;
-
-    // Do codegen for each function individually. Get each function's manifest.
-    let mut manifests = vec![];
-    for function_idx in 0..module.functions.len() {
-        // There's a bunch of per-function information we use.
-        let context = FunctionContext {
-            function: &module.functions[function_idx],
-            types: &module.types,
-            constants: &module.constants,
-            dynamic_constants: &module.dynamic_constants,
-            def_use: &def_uses[function_idx],
-            reverse_postorder: &reverse_postorders[function_idx],
-            typing: &typing[function_idx],
-            control_subgraph: &control_subgraphs[function_idx],
-            fork_join_map: &fork_join_maps[function_idx],
-            fork_join_nest: &fork_join_nests[function_idx],
-            antideps: &antideps[function_idx],
-            bbs: &bbs[function_idx],
-            plan: &plans[function_idx],
-            llvm_types: &llvm_types,
-            llvm_constants: &llvm_constants,
-            llvm_dynamic_constants: &llvm_dynamic_constants,
-            type_sizes_aligns: &type_sizes_aligns,
-            partitions_inverted_map: plans[function_idx].invert_partition_map(),
-        };
-
-        manifests.push(context.codegen_function(w)?);
-    }
-
-    // Assemble the manifest for the whole module.
-    Ok(ModuleManifest {
-        functions: manifests,
-        types: module.types.clone(),
-        type_sizes_aligns,
-        dynamic_constants: module.dynamic_constants.clone(),
-        // Get the types of all of the constants. This requires collecting over
-        // all of the functions, since the calculated types of constants may be
-        // distributed over many functions. This may contain duplicate mappings,
-        // but this should be fine for our purposes, since the mappings
-        // shouldn't conflict.
-        constant_types: module
-            .functions
-            .iter()
-            .enumerate()
-            .map(|(func_idx, function)| {
-                function
-                    .nodes
-                    .iter()
-                    .enumerate()
-                    .filter_map(move |(idx, node)| {
-                        Some((node.try_constant()?, typing[func_idx][idx]))
-                    })
-            })
-            .flatten()
-            .collect(),
-        array_constants: (0..module.constants.len())
-            .map(ConstantID::new)
-            .filter_map(|cons_id| {
-                if module.constants[cons_id.idx()]
-                    .try_array_type(&module.types)
-                    .is_some()
-                {
-                    Some(embed_constant(module, cons_id))
-                } else {
-                    None
-                }
-            })
-            .collect(),
-        array_cons_ids: (0..module.constants.len())
-            .map(ConstantID::new)
-            .filter(|id| {
-                module.constants[id.idx()]
-                    .try_array_type(&module.types)
-                    .is_some()
-            })
-            .collect(),
-    })
-}
-
-impl<'a> FunctionContext<'a> {
-    /*
-     * Each function gets codegened separately.
-     */
-    fn codegen_function<W: Write>(&self, w: &mut W) -> Result<FunctionManifest, std::fmt::Error> {
-        // Find the "top" control node of each partition. One well-formedness
-        // condition of partitions is that there is exactly one "top" control
-        // node.
-        let top_nodes: Vec<NodeID> = self
-            .partitions_inverted_map
-            .iter()
-            .enumerate()
-            .map(|(part_idx, part)| {
-                // For each partition, find the "top" node.
-                *part
-                    .iter()
-                    .filter(move |id| {
-                        // The "top" node is a control node having at least one
-                        // control predecessor in another partition, or is a
-                        // start node. Every predecessor in the control subgraph
-                        // is a control node.
-                        self.function.nodes[id.idx()].is_start()
-                            || (self.function.nodes[id.idx()].is_control()
-                                && self
-                                    .control_subgraph
-                                    .preds(**id)
-                                    .filter(|pred_id| {
-                                        self.plan.partitions[pred_id.idx()].idx() != part_idx
-                                    })
-                                    .count()
-                                    > 0)
-                    })
-                    .next()
-                    .unwrap()
-            })
-            .collect();
-
-        // Collect all the node IDs that are values returned by this function.
-        let returned_values = self
-            .function
-            .nodes
-            .iter()
-            .filter_map(|node| node.try_return().map(|(_, data)| data))
-            .collect();
-
-        // Get the partition ID of the start node.
-        let top_partition = self.plan.partitions[0];
-
-        // Generate code for each individual partition. This generates a single
-        // LLVM function per partition. These functions will be called in async
-        // tasks by the Hercules runtime.
-        assert_eq!(self.plan.num_partitions, top_nodes.len());
-        let mut manifests = vec![];
-        for part_idx in 0..self.plan.num_partitions {
-            match self.plan.partition_devices[part_idx] {
-                Device::CPU => manifests.push(self.codegen_cpu_partition(top_nodes[part_idx], w)?),
-                Device::GPU => todo!(),
-            }
-        }
-
-        // Assemble the manifest for the whole function.
-        Ok(FunctionManifest {
-            name: self.function.name.clone(),
-            param_types: self.function.param_types.clone(),
-            return_type: self.function.return_type,
-            typing: self.typing.clone(),
-            used_constants: self
-                .function
-                .nodes
-                .iter()
-                .filter_map(|node| node.try_constant())
-                .collect(),
-            num_dynamic_constant_parameters: self.function.num_dynamic_constants,
-            partitions: manifests,
-            // TODO: populate dynamic constant rules.
-            dynamic_constant_rules: vec![],
-            top_partition,
-            returned_values,
-        })
-    }
-}
diff --git a/hercules_ir/src/build.rs b/hercules_ir/src/build.rs
index b451dcb8c3da2b7f0399a7613927357243e0b54a..cfc59a2b4ce5283fdc709371cba21ec17c2e0f0c 100644
--- a/hercules_ir/src/build.rs
+++ b/hercules_ir/src/build.rs
@@ -354,24 +354,14 @@ impl<'a> Builder<'a> {
     pub fn create_constant_array(
         &mut self,
         elem_ty: TypeID,
-        cons: Box<[ConstantID]>,
         extents: Box<[u32]>,
     ) -> BuilderResult<ConstantID> {
-        for con in cons.iter() {
-            if self.constant_types[con.idx()] != elem_ty {
-                Err("Constant provided to create_constant_array has a different type than the provided element type.")?
-            }
-        }
         let extents = extents
             .iter()
             .map(|extent| self.create_dynamic_constant_constant(*extent as usize))
             .collect();
         let ty = self.create_type_array(elem_ty, extents);
-        Ok(self.intern_constant(Constant::Array(ty, cons), ty))
-    }
-
-    pub fn create_constant_zero(&mut self, typ : TypeID) -> ConstantID {
-        self.intern_constant(Constant::Zero(typ), typ)
+        Ok(self.intern_constant(Constant::Array(ty), ty))
     }
 
     pub fn create_dynamic_constant_constant(&mut self, val: usize) -> DynamicConstantID {
diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index c7462613847ab73aad421aa105a8e4a81b449040..bde6be4ad8789b14c612e6e1f6341483faefff29 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -375,10 +375,9 @@ pub fn immediate_control_flow(
             .into_iter()
             .fold(UnionNodeSet::top(), |a, b| UnionNodeSet::meet(&a, b));
     }
-    let node = &function.nodes[node_id.idx()];
 
     // Step 2: clear all bits and set bit for current node, if applicable.
-    if node.is_control() {
+    if function.nodes[node_id.idx()].is_control() {
         let mut singular = bitvec![u8, Lsb0; 0; function.nodes.len()];
         singular.set(node_id.idx(), true);
         out = UnionNodeSet::Bits(singular);
diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 63053842406ed696ca68570323d8994aa904cac6..32f3a5787d1e273d6f393a95bc08b31b73038f8c 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -20,7 +20,9 @@ pub fn xdot_module(
     reverse_postorders: &Vec<Vec<NodeID>>,
     doms: Option<&Vec<DomTree>>,
     fork_join_maps: Option<&Vec<HashMap<NodeID, NodeID>>>,
+    bbs: Option<&Vec<Vec<NodeID>>>,
     plans: Option<&Vec<Plan>>,
+    fork_join_placements: Option<&Vec<Vec<ForkJoinPlacement>>>,
 ) {
     let mut tmp_path = temp_dir();
     let mut rng = rand::thread_rng();
@@ -33,7 +35,9 @@ pub fn xdot_module(
         &reverse_postorders,
         doms,
         fork_join_maps,
+        bbs,
         plans,
+        fork_join_placements,
         &mut contents,
     )
     .expect("PANIC: Unable to generate output file contents.");
@@ -54,7 +58,9 @@ pub fn write_dot<W: Write>(
     reverse_postorders: &Vec<Vec<NodeID>>,
     doms: Option<&Vec<DomTree>>,
     fork_join_maps: Option<&Vec<HashMap<NodeID, NodeID>>>,
+    bbs: Option<&Vec<Vec<NodeID>>>,
     plans: Option<&Vec<Plan>>,
+    fork_join_placements: Option<&Vec<Vec<ForkJoinPlacement>>>,
     w: &mut W,
 ) -> std::fmt::Result {
     write_digraph_header(w)?;
@@ -190,6 +196,66 @@ pub fn write_dot<W: Write>(
             }
         }
 
+        // Step 4: draw BB edges in light magenta.
+        if let Some(bbs) = bbs {
+            let bbs = &bbs[function_id.idx()];
+            for node_idx in 0..bbs.len() {
+                let maybe_data = NodeID::new(node_idx);
+                let control = bbs[node_idx];
+                if maybe_data != control {
+                    write_edge(
+                        maybe_data,
+                        function_id,
+                        control,
+                        function_id,
+                        true,
+                        "olivedrab4, constraint=false",
+                        "dotted",
+                        &module,
+                        w,
+                    )?;
+                }
+            }
+        }
+
+        // Step 5: draw fork-join placement edges in purple.
+        if let Some(fork_join_placements) = fork_join_placements {
+            let fork_join_map = &fork_join_maps.unwrap()[function_id.idx()];
+            let fork_join_placement = &fork_join_placements[function_id.idx()];
+            for node_idx in 0..fork_join_placement.len() {
+                let node_id = NodeID::new(node_idx);
+                match fork_join_placement[node_id.idx()] {
+                    ForkJoinPlacement::Sequential => {}
+                    ForkJoinPlacement::Fork(fork_id) => {
+                        write_edge(
+                            node_id,
+                            function_id,
+                            fork_id,
+                            function_id,
+                            true,
+                            "purple, constraint=false",
+                            "dotted",
+                            &module,
+                            w,
+                        )?;
+                    }
+                    ForkJoinPlacement::Reduce(fork_id) => {
+                        write_edge(
+                            node_id,
+                            function_id,
+                            fork_join_map[&fork_id],
+                            function_id,
+                            true,
+                            "purple, constraint=false",
+                            "dotted",
+                            &module,
+                            w,
+                        )?;
+                    }
+                }
+            }
+        }
+
         write_graph_footer(w)?;
     }
 
diff --git a/hercules_ir/src/gcm.rs b/hercules_ir/src/gcm.rs
index 60e7935852fea297d6cce4b86d42edbbf635228a..9da269885c84cf802eba561eb0aad9334ed21103 100644
--- a/hercules_ir/src/gcm.rs
+++ b/hercules_ir/src/gcm.rs
@@ -1,4 +1,8 @@
-use std::collections::HashMap;
+extern crate bitvec;
+
+use std::collections::{HashMap, VecDeque};
+
+use self::bitvec::prelude::*;
 
 use crate::*;
 
@@ -54,7 +58,7 @@ pub fn gcm(
                 .unwrap_or(highest);
 
             // If the ancestor of the control users isn't below the lowest
-            // control use, then just place in the loewst control use.
+            // control use, then just place in the lowest control use.
             if !dom.does_dom(highest, lowest) {
                 highest
             } else {
@@ -76,6 +80,51 @@ pub fn gcm(
                     }
                 }
 
+                // If the assigned location is a join and this node doesn't use
+                // a reduce from that join, we actually want to place these
+                // nodes in the predecessor of the join, so that the code will
+                // get executed in parallel.
+                if let Some(control) = function.nodes[location.idx()].try_join()
+                    && location != NodeID::new(idx)
+                {
+                    // Set up BFS to find reduce nodes.
+                    let mut bfs = VecDeque::new();
+                    let mut bfs_visited = bitvec![u8, Lsb0; 0; function.nodes.len()];
+                    bfs.push_back(NodeID::new(idx));
+                    bfs_visited.set(idx, true);
+                    let mut found_reduce = false;
+                    'bfs: while let Some(id) = bfs.pop_front() {
+                        for use_id in get_uses(&function.nodes[id.idx()]).as_ref() {
+                            // If we find a reduce, check that it's attached to
+                            // the join we care about
+                            if let Some((join, _, _)) = function.nodes[use_id.idx()].try_reduce()
+                                && join == location
+                            {
+                                found_reduce = true;
+                                break 'bfs;
+                            }
+
+                            // Only go through data nodes.
+                            if bfs_visited[use_id.idx()]
+                                || function.nodes[use_id.idx()].is_control()
+                            {
+                                continue;
+                            }
+
+                            bfs.push_back(*use_id);
+                            bfs_visited.set(use_id.idx(), true);
+                        }
+                    }
+
+                    // If we don't depend on the reduce, we're not in a cycle
+                    // with the reduce. Therefore, we should be scheduled to the
+                    // predecessor of the join, since this code can run in
+                    // parallel.
+                    if !found_reduce {
+                        location = control;
+                    }
+                }
+
                 location
             }
         })
@@ -104,10 +153,185 @@ pub fn compute_fork_join_nesting(
             (
                 id,
                 dom.ascend(id)
+                    // Filter for forks that dominate this control node,
                     .filter(|id| function.nodes[id.idx()].is_fork())
+                    // where its corresponding join doesn't dominate the control
+                    // node (if so, then this control is after the fork-join).
                     .filter(|fork_id| !dom.does_prop_dom(fork_join_map[&fork_id], id))
                     .collect(),
             )
         })
         .collect()
 }
+
+/*
+ * Find all the reduce-cycles in a function.
+ */
+pub fn compute_reduce_cycles(function: &Function) -> HashMap<NodeID, Vec<NodeID>> {
+    let mut result = HashMap::new();
+    let mut dfs_visited = bitvec![u8, Lsb0; 0; function.nodes.len()];
+
+    for id in (0..function.nodes.len()).map(NodeID::new) {
+        if let Node::Reduce {
+            control: _,
+            init: _,
+            reduct,
+        } = &function.nodes[id.idx()]
+        {
+            // DFS to find data cycle "rooted" at reduce.
+            dfs_visited.fill(false);
+            dfs_visited.set(id.idx(), true);
+            // The stack starts with the reduce node itself and the `reduct` use
+            // of the reduce node.
+            let mut dfs_stack = vec![(id, 0), (*reduct, 0)];
+            'dfs: while let Some((node_id, use_idx)) = dfs_stack.pop() {
+                if node_id == id {
+                    // If we returned to the reduce node, then there is no
+                    // cycle. This will be signified by any empty vector in the
+                    // return map.
+                    break;
+                }
+
+                dfs_visited.set(node_id.idx(), true);
+
+                // If there are further uses...
+                let uses = get_uses(&function.nodes[node_id.idx()]);
+                if use_idx < uses.as_ref().len() {
+                    // Push ourselves back on to the stack.
+                    dfs_stack.push((node_id, use_idx + 1));
+
+                    // Check if the use is a data node.
+                    let use_id = uses.as_ref()[use_idx];
+                    if !function.nodes[use_id.idx()].is_control() {
+                        // If so, check if the next use was already visited.
+                        if !dfs_visited[use_id.idx()] {
+                            // If not, add the use to the stack.
+                            dfs_stack.push((use_id, 0));
+                        } else if dfs_stack.iter().any(|(id, _)| *id == use_id) {
+                            // If so, and the use is already in the stack, we've
+                            // found a cycle - if the already visited node we
+                            // found isn't the reduce, then there's a cycle not
+                            // involving the reduce, which isn't valid.
+                            assert_eq!(
+                                id, use_id,
+                                "PANIC: Found cycle not containing expected reduce node."
+                            );
+                            break 'dfs;
+                        }
+                    }
+                }
+            }
+
+            result.insert(id, dfs_stack.into_iter().map(|(id, _)| id).collect());
+        }
+    }
+
+    result
+}
+
+pub fn invert_reduce_cycles(
+    function: &Function,
+    reduce_cycles: &HashMap<NodeID, Vec<NodeID>>,
+    join_fork_map: &HashMap<NodeID, NodeID>,
+    fork_join_nest: &HashMap<NodeID, Vec<NodeID>>,
+) -> Vec<Option<NodeID>> {
+    let mut result: Vec<Option<NodeID>> = vec![None; function.nodes.len()];
+
+    for (reduce, in_cycle) in reduce_cycles {
+        for node in in_cycle {
+            if let Some(old_reduce) = result[node.idx()] {
+                // A node may be in multiple reduce cycles when there are nested
+                // fork-joins. In such cases, we pick the more "deeply nested"
+                // reduce cycle.
+                let old_join_id = function.nodes[old_reduce.idx()].try_reduce().unwrap().0;
+                let old_fork_id = join_fork_map[&old_join_id];
+                let new_join_id = function.nodes[reduce.idx()].try_reduce().unwrap().0;
+                let new_fork_id = join_fork_map[&new_join_id];
+
+                let old_above_new = fork_join_nest[&new_fork_id].contains(&old_fork_id);
+                let new_above_old = fork_join_nest[&old_fork_id].contains(&new_fork_id);
+                assert!(old_above_new ^ new_above_old, "PANIC: A node can only be in reduce cycles that are hierarchically related and from different fork-joins.");
+                if old_above_new {
+                    result[node.idx()] = Some(*reduce);
+                }
+            } else {
+                result[node.idx()] = Some(*reduce);
+            }
+        }
+    }
+
+    result
+}
+
+/*
+ * Description of a node's placement amongst fork-joins, generated per-node by
+ * `compute_fork_join_placements`.
+ */
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum ForkJoinPlacement {
+    // The node is not "in" any fork-joins.
+    Sequential,
+    // The node is in the "fork" section of the fork/join, marked by fork ID.
+    Fork(NodeID),
+    // The node is in the "reduce" section of the fork/join, marked by fork ID.
+    Reduce(NodeID),
+}
+
+/*
+ * Find which fork/join each data node is "inside" of, based off of basic block
+ * scheduling (global code motion information). A data node is either not a part
+ * of any fork/join, is a part of the "fork" section of a fork/join, or is a
+ * part of the "reduce" section of a fork/join. The following conditions are
+ * applied in order to determine which category a data node is in:
+ * 1. If a data node is contained in a cycle containing a reduce node OR is
+ *    scheduled to the basic block of the join node of the fork/join, the data
+ *    node is in the "reduce" section of that fork/join (more specifically, the
+ *    most deeply nested such reduce node). Otherwise...
+ * 2. If a data node is scheduled to a control node inside a fork/join, the data
+ *    node is in the "fork" section of that fork/join (more specifically, the
+ *    most deeply nested such fork/join). Otherwise...
+ * 3. If a data node is not in a "reduce" or "fork" section of any fork/join, it
+ *    is a "sequential" node.
+ */
+pub fn compute_fork_join_placement(
+    function: &Function,
+    fork_join_map: &HashMap<NodeID, NodeID>,
+    fork_join_nest: &HashMap<NodeID, Vec<NodeID>>,
+    bbs: &Vec<NodeID>,
+) -> Vec<ForkJoinPlacement> {
+    let mut result = vec![ForkJoinPlacement::Sequential; function.nodes.len()];
+    let join_fork_map = fork_join_map
+        .into_iter()
+        .map(|(fork, join)| (*join, *fork))
+        .collect::<HashMap<_, _>>();
+    let reduce_cycles = compute_reduce_cycles(function);
+    let inverted_reduce_cycles =
+        invert_reduce_cycles(function, &reduce_cycles, &join_fork_map, fork_join_nest);
+
+    for id in (0..function.nodes.len()).map(NodeID::new) {
+        // Check condition #1.
+        if let Some(reduce_id) = &inverted_reduce_cycles[id.idx()] {
+            let join_id = function.nodes[reduce_id.idx()].try_reduce().unwrap().0;
+            let fork_id = join_fork_map[&join_id];
+            result[id.idx()] = ForkJoinPlacement::Reduce(fork_id);
+            continue;
+        }
+
+        if let Some(fork_id) = join_fork_map.get(&bbs[id.idx()]) {
+            result[id.idx()] = ForkJoinPlacement::Reduce(*fork_id);
+            continue;
+        }
+
+        // Check condition #2.
+        let forks = &fork_join_nest[&bbs[id.idx()]];
+        if let Some(fork_id) = forks.get(0) {
+            result[id.idx()] = ForkJoinPlacement::Fork(*fork_id);
+            continue;
+        }
+
+        // Default to condition #3.
+        result[id.idx()] = ForkJoinPlacement::Sequential;
+    }
+
+    result
+}
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index ea9c07205029717d700700e8b97db76cd07e989f..688902b6be01c478202adcadac1e94ce5de0e304 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -31,9 +31,9 @@ pub struct Module {
  * A function has a name, a list of types for its parameters, a single return
  * type, a list of nodes in its sea-of-nodes style IR, and a number of dynamic
  * constants. When calling a function, arguments matching the parameter types
- * are required, as well as the correct number of dynamic constants. All
- * dynamic constants are 64-bit unsigned integers (usize / u64), so it is
- * sufficient to merely store how many of them the function takes as arguments.
+ * are required, as well as the correct number of dynamic constants. All dynamic
+ * constants are 64-bit unsigned integers (usize / u64), so it is sufficient to
+ * just store how many of them the function takes as arguments.
  */
 #[derive(Debug, Clone)]
 pub struct Function {
@@ -78,11 +78,7 @@ pub enum Type {
  * Constants are pretty standard in Hercules IR. Float constants used the
  * ordered_float crate so that constants can be keys in maps (used for
  * interning constants during IR construction). Product, summation, and array
- * constants all contain their own type. This is only strictly necessary for
- * summation types, but provides a nice mechanism for sanity checking for
- * product and array types as well. There is also a zero initializer constant,
- * which stores its own type as well. The zero value of a summation is defined
- * as the zero value of the first variant.
+ * constants all contain their own type.
  */
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Constant {
@@ -99,8 +95,8 @@ pub enum Constant {
     Float64(ordered_float::OrderedFloat<f64>),
     Product(TypeID, Box<[ConstantID]>),
     Summation(TypeID, u32, ConstantID),
-    Array(TypeID, Box<[ConstantID]>),
-    Zero(TypeID),
+    // Array constants are always zero.
+    Array(TypeID),
 }
 
 /*
@@ -345,18 +341,9 @@ impl Module {
                 self.write_constant(*field, w)?;
                 write!(w, ")")
             }
-            Constant::Array(_, elems) => {
-                write!(w, "[")?;
-                for idx in 0..elems.len() {
-                    let elem_cons_id = elems[idx];
-                    self.write_constant(elem_cons_id, w)?;
-                    if idx + 1 < elems.len() {
-                        write!(w, ", ")?;
-                    }
-                }
-                write!(w, "]")
+            Constant::Array(_) => {
+                write!(w, "[]")
             }
-            Constant::Zero(_) => write!(w, "zero"),
         }?;
 
         Ok(())
@@ -374,18 +361,6 @@ impl Module {
 
         Ok(())
     }
-
-    /*
-     * Unfortunately, determining if a constant is an array requires both
-     * knowledge of constants and types, due to zero initializer constants.
-     */
-    pub fn is_array_constant(&self, cons_id: ConstantID) -> bool {
-        if let Constant::Zero(ty_id) = self.constants[cons_id.idx()] {
-            self.types[ty_id.idx()].is_array()
-        } else {
-            self.constants[cons_id.idx()].is_strictly_array()
-        }
-    }
 }
 
 /*
@@ -463,7 +438,7 @@ pub fn constants_bottom_up(constants: &Vec<Constant>) -> impl Iterator<Item = Co
                 continue;
             }
             match &constants[id.idx()] {
-                Constant::Product(_, children) | Constant::Array(_, children) => {
+                Constant::Product(_, children) => {
                     // We have to yield the children of this node before
                     // this node itself. We keep track of which nodes have
                     // yielded using visited.
@@ -693,16 +668,9 @@ impl Type {
     }
 }
 
-pub fn element_type(mut ty: TypeID, types: &Vec<Type>) -> TypeID {
-    while let Type::Array(elem, _) = types[ty.idx()] {
-        ty = elem;
-    }
-    ty
-}
-
 impl Constant {
-    pub fn is_strictly_array(&self) -> bool {
-        if let Constant::Array(_, _) = self {
+    pub fn is_array(&self) -> bool {
+        if let Constant::Array(_) = self {
             true
         } else {
             false
@@ -711,66 +679,23 @@ impl Constant {
 
     // A zero constant may need to return constants that don't exist yet, so we
     // need mutable access to the constants array.
-    pub fn try_product_fields(
-        &self,
-        types: &[Type],
-        constants: &mut Vec<Constant>,
-    ) -> Option<Vec<ConstantID>> {
+    pub fn try_product_fields(&self) -> Option<Vec<ConstantID>> {
         match self {
             Constant::Product(_, fields) => Some(fields.iter().map(|x| *x).collect()),
-            Constant::Zero(ty) => match types[ty.idx()] {
-                Type::Product(ref fields) => Some(
-                    fields
-                        .iter()
-                        .map(|field_ty| {
-                            let field_constant = Constant::Zero(*field_ty);
-                            if let Some(idx) = constants
-                                .iter()
-                                .position(|constant| *constant == field_constant)
-                            {
-                                ConstantID::new(idx)
-                            } else {
-                                let id = ConstantID::new(constants.len());
-                                constants.push(field_constant);
-                                id
-                            }
-                        })
-                        .collect(),
-                ),
-                _ => None,
-            },
             _ => None,
         }
     }
 
-    pub fn try_array_type(&self, types: &[Type]) -> Option<TypeID> {
-        // Need types, since zero initializer may be for a collection type, or
-        // not.
+    pub fn try_array_type(&self) -> Option<TypeID> {
         match self {
-            Constant::Array(ty, _) => Some(*ty),
-            Constant::Zero(ty) => {
-                if types[ty.idx()].is_array() {
-                    Some(*ty)
-                } else {
-                    None
-                }
-            }
+            Constant::Array(ty) => Some(*ty),
             _ => None,
         }
     }
 
-    pub fn try_product_type(&self, types: &[Type]) -> Option<TypeID> {
-        // Need types, since zero initializer may be for a collection type, or
-        // not.
+    pub fn try_product_type(&self) -> Option<TypeID> {
         match self {
             Constant::Product(ty, _) => Some(*ty),
-            Constant::Zero(ty) => {
-                if types[ty.idx()].is_product() {
-                    Some(*ty)
-                } else {
-                    None
-                }
-            }
             _ => None,
         }
     }
@@ -807,7 +732,6 @@ impl Constant {
             Constant::UnsignedInteger64(0) => true,
             Constant::Float32(ord) => *ord == ordered_float::OrderedFloat::<f32>(0.0),
             Constant::Float64(ord) => *ord == ordered_float::OrderedFloat::<f64>(0.0),
-            Constant::Zero(_) => true,
             _ => false,
         }
     }
@@ -838,6 +762,22 @@ impl DynamicConstant {
         }
     }
 
+    pub fn is_constant(&self) -> bool {
+        if let DynamicConstant::Constant(_) = self {
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn try_parameter(&self) -> Option<usize> {
+        if let DynamicConstant::Parameter(v) = self {
+            Some(*v)
+        } else {
+            None
+        }
+    }
+
     pub fn try_constant(&self) -> Option<usize> {
         if let DynamicConstant::Constant(v) = self {
             Some(*v)
@@ -863,6 +803,8 @@ macro_rules! define_pattern_predicate {
 }
 
 impl Index {
+    define_pattern_predicate!(is_field, Index::Field(_));
+    define_pattern_predicate!(is_position, Index::Position(_));
 
     pub fn try_field(&self) -> Option<usize> {
         if let Index::Field(field) = self {
@@ -989,6 +931,43 @@ impl Node {
         }
     }
 
+    pub fn try_fork(&self) -> Option<(NodeID, DynamicConstantID)> {
+        if let Node::Fork { control, factor } = self {
+            Some((*control, *factor))
+        } else {
+            None
+        }
+    }
+
+    pub fn try_thread_id(&self) -> Option<NodeID> {
+        if let Node::ThreadID { control } = self {
+            Some(*control)
+        } else {
+            None
+        }
+    }
+
+    pub fn try_join(&self) -> Option<NodeID> {
+        if let Node::Join { control } = self {
+            Some(*control)
+        } else {
+            None
+        }
+    }
+
+    pub fn try_reduce(&self) -> Option<(NodeID, NodeID, NodeID)> {
+        if let Node::Reduce {
+            control,
+            init,
+            reduct,
+        } = self
+        {
+            Some((*control, *init, *reduct))
+        } else {
+            None
+        }
+    }
+
     pub fn try_constant(&self) -> Option<ConstantID> {
         if let Node::Constant { id } = self {
             Some(*id)
@@ -1285,265 +1264,34 @@ macro_rules! define_id_type {
     };
 }
 
-define_id_type!(FunctionID);
-define_id_type!(NodeID);
-define_id_type!(TypeID);
-define_id_type!(ConstantID);
-define_id_type!(DynamicConstantID);
-
-/*
- * Sometimes, it's useful to debug print out a module. This code prints out a
- * module in (approximately) the same textual format as is parsed in parse.rs.
- */
-use std::fmt::Display;
-use std::fmt::Formatter;
-
-impl Display for Module {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        for func in self.functions.iter() {
-            func.ir_fmt(f, self)?;
-            write!(f, "\n")?;
-        }
-        Ok(())
-    }
-}
-
-/*
- * When printing out objects in a module, we may need to refer back (upwards) to
- * other objects in the module. Display doesn't let us do that, so we make our
- * own trait.
- */
-trait IRDisplay {
-    fn ir_fmt(&self, f: &mut Formatter<'_>, module: &Module) -> std::fmt::Result;
-}
-
-impl IRDisplay for Function {
-    fn ir_fmt(&self, f: &mut Formatter<'_>, module: &Module) -> std::fmt::Result {
-        write!(f, "fn {}<{}>(", self.name, self.num_dynamic_constants)?;
-
-        for (idx, typ) in self.param_types.iter().enumerate() {
-            write!(f, "arg_{} : ", idx)?;
-            module.write_type(*typ, f)?;
-            if idx + 1 < self.param_types.len() {
-                write!(f, ", ")?;
-            }
-        }
-
-        write!(f, ") -> ")?;
-        module.write_type(self.return_type, f)?;
-
-        write!(f, "\n")?;
-
-        for (idx, node) in self.nodes.iter().enumerate() {
-            write!(f, "\tvar_{} = ", idx)?;
-            node.ir_fmt(f, module)?;
-            write!(f, "\n")?;
-        }
-
-        Ok(())
-    }
-}
-
-impl IRDisplay for Node {
-    fn ir_fmt(&self, f: &mut Formatter<'_>, module: &Module) -> std::fmt::Result {
-        match self {
-            Node::Start => {
-                write!(f, "start")
-            }
-            Node::Region { preds } => {
-                write!(f, "region(")?;
-                for (idx, pred) in preds.iter().enumerate() {
-                    write!(f, "var_{}", pred.0)?;
-                    if idx + 1 < preds.len() {
-                        write!(f, ", ")?;
-                    }
-                }
-                write!(f, ")")
-            }
-            Node::If { control, cond } => {
-                write!(f, "if(var_{}, var_{})", control.0, cond.0)
-            }
-            Node::Match { control, sum } => {
-                write!(f, "match(var_{}, var_{})", control.0, sum.0)
-            }
-            Node::Fork { control, factor } => {
-                write!(f, "fork(var_{}, ", control.0)?;
-                module.write_dynamic_constant(*factor, f)?;
-                write!(f, ")")
-            }
-            Node::Join { control } => {
-                write!(f, "join(var_{})", control.0)
-            }
-            Node::Phi { control, data } => {
-                write!(f, "phi(var_{}", control.0)?;
-                for val in data.iter() {
-                    write!(f, ", var_{}", val.0)?;
-                }
-                write!(f, ")")
-            }
-            Node::ThreadID { control } => {
-                write!(f, "thread_id(var_{})", control.0)
-            }
-            Node::Reduce {
-                control,
-                init,
-                reduct,
-            } => {
-                write!(
-                    f,
-                    "reduce(var_{}, var_{}, var_{})",
-                    control.0, init.0, reduct.0
-                )
-            }
-            Node::Return { control, data } => {
-                write!(f, "return(var_{}, var_{})", control.0, data.0)
-            }
-            Node::Parameter { index } => {
-                write!(f, "arg_{}", index)
-            }
-            Node::Constant { id } => {
-                write!(f, "constant(")?;
-                module.constants[id.idx()].ir_fmt(f, module)?;
-                write!(f, ")")
-            }
-            Node::DynamicConstant { id } => {
-                write!(f, "dynamic_constant(")?;
-                module.write_dynamic_constant(*id, f)?;
-                write!(f, ")")
-            }
-            Node::Unary { input, op } => {
-                write!(f, "{}(var_{})", op.lower_case_name(), input.0)
-            }
-            Node::Binary { left, right, op } => {
-                write!(
-                    f,
-                    "{}(var_{}, var_{})",
-                    op.lower_case_name(),
-                    left.0,
-                    right.0
-                )
-            }
-            Node::Call {
-                function,
-                dynamic_constants,
-                args,
-            } => {
-                write!(f, "call<")?;
-                for (idx, dyn_const) in dynamic_constants.iter().enumerate() {
-                    module.write_dynamic_constant(*dyn_const, f)?;
-                    if idx + 1 < dynamic_constants.len() {
-                        write!(f, ", ")?;
-                    }
-                }
-                write!(f, ">({}", module.functions[function.0 as usize].name)?;
-                for arg in args.iter() {
-                    write!(f, ", var_{}", arg.0)?;
-                }
-                write!(f, ")")
-            }
-            Node::Read { collect, indices } => {
-                write!(f, "read(var_{}", collect.0)?;
-                for idx in indices.iter() {
-                    write!(f, ", ")?;
-                    idx.ir_fmt(f, module)?;
-                }
-                write!(f, ")")
-            }
-            Node::Write {
-                collect,
-                data,
-                indices,
-            } => {
-                write!(f, "write(var_{}, var_{}", collect.0, data.0)?;
-                for idx in indices.iter() {
-                    write!(f, ", ")?;
-                    idx.ir_fmt(f, module)?;
-                }
-                write!(f, ")")
-            }
-            Node::Ternary {
-                first,
-                second,
-                third,
-                op,
-            } => {
-                write!(
-                    f,
-                    "{}(var_{}, var_{}, var_{})",
-                    op.lower_case_name(),
-                    first.0,
-                    second.0,
-                    third.0
-                )
-            }
-            Node::Projection { control, selection } => {
-                write!(f, "projection({}, {})", control.0, selection)
-            }
-        }
-    }
-}
+#[macro_export]
+macro_rules! define_dual_id_type {
+    ($x: ident) => {
+        #[derive(
+            Debug,
+            Default,
+            Clone,
+            Copy,
+            PartialEq,
+            Eq,
+            Hash,
+            PartialOrd,
+            Ord,
+            serde::Serialize,
+            serde::Deserialize,
+        )]
+        pub struct $x(u32, u32);
 
-impl IRDisplay for Index {
-    fn ir_fmt(&self, f: &mut Formatter<'_>, _module: &Module) -> std::fmt::Result {
-        match self {
-            Index::Field(idx) => write!(f, "field({})", idx),
-            Index::Variant(idx) => write!(f, "variant({})", idx),
-            Index::Position(indices) => {
-                write!(f, "position(")?;
-                for (i, idx) in indices.iter().enumerate() {
-                    write!(f, "var_{}", idx.0)?;
-                    if i + 1 < indices.len() {
-                        write!(f, ", ")?;
-                    }
-                }
-                write!(f, ")")
+        impl $x {
+            pub fn new(x: usize, y: usize) -> Self {
+                $x(x as u32, y as u32)
             }
         }
-    }
+    };
 }
 
-impl IRDisplay for Constant {
-    fn ir_fmt(&self, f: &mut Formatter<'_>, module: &Module) -> std::fmt::Result {
-        match self {
-            Constant::Boolean(v) => write!(f, "{} : bool", v),
-            Constant::Integer8(v) => write!(f, "{} : i8", v),
-            Constant::Integer16(v) => write!(f, "{} : i16", v),
-            Constant::Integer32(v) => write!(f, "{} : i32", v),
-            Constant::Integer64(v) => write!(f, "{} : i64", v),
-            Constant::UnsignedInteger8(v) => write!(f, "{} : u8", v),
-            Constant::UnsignedInteger16(v) => write!(f, "{} : u16", v),
-            Constant::UnsignedInteger32(v) => write!(f, "{} : u32", v),
-            Constant::UnsignedInteger64(v) => write!(f, "{} : u64", v),
-            Constant::Float32(v) => write!(f, "{} : f32", v),
-            Constant::Float64(v) => write!(f, "{} : f64", v),
-            Constant::Product(t, cnsts) => {
-                write!(f, "(")?;
-                for i in 0..cnsts.len() {
-                    module.constants[cnsts[i].idx()].ir_fmt(f, module)?;
-                    write!(f, ", ")?;
-                }
-                write!(f, ") :")?;
-                module.write_type(*t, f)
-            }
-            Constant::Summation(t, tag, cnst) => {
-                write!(f, "{}(", tag)?;
-                module.constants[cnst.idx()].ir_fmt(f, module)?;
-                write!(f, ") : ")?;
-                module.write_type(*t, f)
-            }
-            Constant::Array(t, cnsts) => {
-                write!(f, "{{")?;
-                for i in 0..cnsts.len() {
-                    module.constants[cnsts[i].idx()].ir_fmt(f, module)?;
-                    write!(f, ", ")?;
-                }
-                write!(f, "}} : ")?;
-                module.write_type(*t, f)
-            }
-            Constant::Zero(t) => {
-                write!(f, "zero : ")?;
-                module.write_type(*t, f)
-            }
-        }
-    }
-}
+define_id_type!(FunctionID);
+define_id_type!(NodeID);
+define_id_type!(TypeID);
+define_id_type!(ConstantID);
+define_id_type!(DynamicConstantID);
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index fc5e397bf4a6848d6e91969779f90906dbaa2021..70eb270ca7d02827b1ef5f21b739ea643d4836b8 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -221,7 +221,7 @@ fn parse_function<'a>(
     let (ir_text, return_type) = parse_type_id(ir_text, context)?;
     let (ir_text, nodes) = nom::multi::many1(|x| parse_node(x, context))(ir_text)?;
 
-    // nodes, as returned by parsing, is in parse order, which may differ from
+    // `nodes`, as returned by parsing, is in parse order, which may differ from
     // the order dictated by NodeIDs in the node name intern map.
     let mut fixed_nodes = vec![Node::Start; context.borrow().node_ids.len()];
     for (name, node) in nodes {
@@ -270,7 +270,18 @@ fn parse_node<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, (&'a str, Node)> {
-    let ir_text = nom::character::complete::multispace0(ir_text)?.0;
+    let mut ir_text = nom::character::complete::multispace0(ir_text)?.0;
+    if let Ok((comment_ir_text, _)) =
+        nom::character::complete::char::<&'a str, (&'a str, _)>('#')(ir_text)
+    {
+        let comment_ir_text =
+            nom::bytes::complete::take_while(|c| !nom::character::is_newline(c as u8))(
+                comment_ir_text,
+            )?
+            .0;
+        let comment_ir_text = nom::character::complete::line_ending(comment_ir_text)?.0;
+        ir_text = nom::character::complete::multispace0(comment_ir_text)?.0;
+    }
     let (ir_text, node_name) = parse_identifier(ir_text)?;
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::character::complete::char('=')(ir_text)?.0;
@@ -718,7 +729,6 @@ fn parse_match<'a>(
 }
 
 fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Type> {
-    // Parser combinators are very convenient, if a bit hard to read.
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let (ir_text, ty) = nom::branch::alt((
         // Control tokens are parameterized by a list of dynamic constants
@@ -893,15 +903,6 @@ fn parse_constant<'a>(
     ty: Type,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Constant> {
-    let ty_id = context.borrow_mut().get_type_id(ty.clone());
-    let (ir_text, maybe_constant) = nom::combinator::opt(nom::combinator::map(
-        nom::bytes::complete::tag("zero"),
-        |_| Constant::Zero(ty_id),
-    ))(ir_text)?;
-    if let Some(cons) = maybe_constant {
-        return Ok((ir_text, cons));
-    }
-
     let (ir_text, constant) = match ty {
         // There are not control constants.
         Type::Control(_) => Err(nom::Err::Error(nom::error::Error {
@@ -931,12 +932,9 @@ fn parse_constant<'a>(
             tys,
             context,
         )?,
-        Type::Array(elem_ty, _) => parse_array_constant(
-            ir_text,
-            context.borrow_mut().get_type_id(ty.clone()),
-            elem_ty,
-            context,
-        )?,
+        Type::Array(_, _) => {
+            parse_array_constant(ir_text, context.borrow_mut().get_type_id(ty.clone()))?
+        }
     };
     Ok((ir_text, constant))
 }
@@ -1086,42 +1084,12 @@ fn parse_summation_constant<'a>(
     Ok((ir_text, Constant::Summation(sum_ty, variant, id)))
 }
 
-fn parse_array_constant<'a>(
-    ir_text: &'a str,
-    array_ty: TypeID,
-    elem_ty: TypeID,
-    context: &RefCell<Context<'a>>,
-) -> nom::IResult<&'a str, Constant> {
+fn parse_array_constant<'a>(ir_text: &'a str, array_ty: TypeID) -> nom::IResult<&'a str, Constant> {
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::character::complete::char('[')(ir_text)?.0;
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
-    let (ir_text, entries) = nom::multi::separated_list1(
-        nom::sequence::tuple((
-            nom::character::complete::multispace0,
-            nom::character::complete::char(','),
-            nom::character::complete::multispace0,
-        )),
-        |x| {
-            parse_constant_id(
-                x,
-                context
-                    .borrow()
-                    .reverse_type_map
-                    .get(&elem_ty)
-                    .unwrap()
-                    .clone(),
-                context,
-            )
-        },
-    )(ir_text)?;
-    let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::character::complete::char(']')(ir_text)?.0;
-
-    // Will check that entries is the correct size during typechecking.
-    Ok((
-        ir_text,
-        Constant::Array(array_ty, entries.into_boxed_slice()),
-    ))
+    Ok((ir_text, Constant::Array(array_ty)))
 }
 
 fn parse_identifier<'a>(ir_text: &'a str) -> nom::IResult<&'a str, &'a str> {
diff --git a/hercules_ir/src/schedule.rs b/hercules_ir/src/schedule.rs
index 3a240a250d8a94cd5190b6cb2f1873f7ca5cd2a4..fb839b5c4a469e8f6a9b8a718f361f959e0c85e7 100644
--- a/hercules_ir/src/schedule.rs
+++ b/hercules_ir/src/schedule.rs
@@ -171,6 +171,274 @@ impl Plan {
             num_partitions,
         }
     }
+
+    /*
+     * Verify that a partitioning is valid.
+     */
+    pub fn verify_partitioning(
+        &self,
+        function: &Function,
+        def_use: &ImmutableDefUseMap,
+        fork_join_map: &HashMap<NodeID, NodeID>,
+    ) {
+        let partition_to_node_ids = self.invert_partition_map();
+
+        // First, verify that there is at most one control node in the partition
+        // with a control use outside the partition. A partition may only have
+        // zero such control nodes if it contains the start node. This also
+        // checks that each partition has at least one control node.
+        for nodes_in_partition in partition_to_node_ids.iter() {
+            let contains_start = nodes_in_partition
+                .iter()
+                .any(|id| function.nodes[id.idx()] == Node::Start);
+            let num_inter_partition_control_uses = nodes_in_partition
+                .iter()
+                .filter(|id| {
+                    // An inter-partition control use is a control node,
+                    function.nodes[id.idx()].is_control()
+                        // where one of its uses,
+                        && get_uses(&function.nodes[id.idx()])
+                            .as_ref()
+                            .into_iter()
+                            .any(|use_id| {
+                                // that is itself a control node as well,
+                                function.nodes[use_id.idx()].is_control()
+                                    // is in a different partition.
+                                    && self.partitions[use_id.idx()] != self.partitions[id.idx()]
+                            })
+                })
+                .count();
+
+            assert!(num_inter_partition_control_uses + contains_start as usize == 1, "PANIC: Found an invalid partition based on the inter-partition control use criteria.");
+        }
+
+        // Second, verify that fork-joins are not split amongst partitions.
+        for id in (0..function.nodes.len()).map(NodeID::new) {
+            if function.nodes[id.idx()].is_fork() {
+                let fork_part = self.partitions[id.idx()];
+
+                // The join must be in the same partition.
+                let join = fork_join_map[&id];
+                assert_eq!(
+                    fork_part,
+                    self.partitions[join.idx()],
+                    "PANIC: Join is in a different partition than its corresponding fork."
+                );
+
+                // The thread IDs must be in the same partition.
+                def_use
+                    .get_users(id)
+                    .into_iter()
+                    .filter(|user| function.nodes[user.idx()].is_thread_id())
+                    .for_each(|thread_id| {
+                        assert_eq!(
+                            fork_part,
+                            self.partitions[thread_id.idx()],
+                            "PANIC: Thread ID is in a different partition than its fork use."
+                        )
+                    });
+
+                // The reduces must be in the same partition.
+                def_use
+                    .get_users(join)
+                    .into_iter()
+                    .filter(|user| function.nodes[user.idx()].is_reduce())
+                    .for_each(|reduce| {
+                        assert_eq!(
+                            fork_part,
+                            self.partitions[reduce.idx()],
+                            "PANIC: Reduce is in a different partition than its join use."
+                        )
+                    });
+            }
+        }
+
+        // Third, verify that every data node has proper dominance relations
+        // with respect to the partitioning. In particular:
+        // 1. Every non-phi data node should be in a partition that is dominated
+        //    by the partitions of every one of its uses.
+        // 2. Every data node should be in a partition that dominates the
+        //    partitions of every one of its non-phi users.
+        // Compute a dominance relation between the partitions by constructing a
+        // partition control graph.
+        let partition_graph = partition_graph(function, def_use, self);
+        let dom = dominator(&partition_graph, NodeID::new(self.partitions[0].idx()));
+        for id in (0..function.nodes.len()).map(NodeID::new) {
+            let part = self.partitions[id.idx()];
+
+            // Check condition #1.
+            if !function.nodes[id.idx()].is_phi() {
+                let uses = get_uses(&function.nodes[id.idx()]);
+                for use_id in uses.as_ref() {
+                    let use_part = self.partitions[use_id.idx()];
+                    assert!(dom.does_dom(NodeID::new(use_part.idx()), NodeID::new(part.idx())), "PANIC: A data node has a partition use that doesn't dominate its partition.");
+                }
+            }
+
+            // Check condition #2.
+            let users = def_use.get_users(id);
+            for user_id in users.as_ref() {
+                if !function.nodes[user_id.idx()].is_phi() {
+                    let user_part = self.partitions[user_id.idx()];
+                    assert!(dom.does_dom(NodeID::new(part.idx()), NodeID::new(user_part.idx())), "PANIC: A data node has a partition user that isn't dominated by its partition.");
+                }
+            }
+        }
+
+        // Fourth, verify that every partition has at least one partition
+        // successor xor has at least one return node.
+        for partition_idx in 0..self.num_partitions {
+            let has_successor = partition_graph.succs(NodeID::new(partition_idx)).count() > 0;
+            let has_return = partition_to_node_ids[partition_idx]
+                .iter()
+                .any(|node_id| function.nodes[node_id.idx()].is_return());
+            assert!(has_successor ^ has_return, "PANIC: Found an invalid partition based on the partition return / control criteria.");
+        }
+    }
+
+    /*
+     * Compute the top node for each partition.
+     */
+    pub fn compute_top_nodes(
+        &self,
+        function: &Function,
+        control_subgraph: &Subgraph,
+        inverted_partition_map: &Vec<Vec<NodeID>>,
+    ) -> Vec<NodeID> {
+        inverted_partition_map
+            .into_iter()
+            .enumerate()
+            .map(|(part_idx, part)| {
+                // For each partition, find the "top" node.
+                *part
+                    .iter()
+                    .filter(move |id| {
+                        // The "top" node is a control node having at least one
+                        // control predecessor in another partition, or is a
+                        // start node. Every predecessor in the control subgraph
+                        // is a control node.
+                        function.nodes[id.idx()].is_start()
+                            || (function.nodes[id.idx()].is_control()
+                                && control_subgraph
+                                    .preds(**id)
+                                    .filter(|pred_id| {
+                                        self.partitions[pred_id.idx()].idx() != part_idx
+                                    })
+                                    .count()
+                                    > 0)
+                    })
+                    // We assume here there is exactly one such top node per
+                    // partition. Verify a partitioning with
+                    // `verify_partitioning` before calling this method.
+                    .next()
+                    .unwrap()
+            })
+            .collect()
+    }
+
+    /*
+     * Compute the data inputs of each partition.
+     */
+    pub fn compute_data_inputs(&self, function: &Function) -> Vec<Vec<NodeID>> {
+        let mut data_inputs = vec![vec![]; self.num_partitions];
+
+        // First consider the non-phi nodes in each partition.
+        for id in (0..function.nodes.len()).map(NodeID::new) {
+            if function.nodes[id.idx()].is_phi() {
+                continue;
+            }
+
+            let data_inputs = &mut data_inputs[self.partitions[id.idx()].idx()];
+            let uses = get_uses(&function.nodes[id.idx()]);
+            for use_id in uses.as_ref() {
+                // For every non-phi node, check each of its data uses. If the
+                // node and its use are in different partitions, then the use is
+                // a data input for the partition of the node. Also, don't add
+                // the same node to the data inputs list twice.
+                if !function.nodes[use_id.idx()].is_control()
+                    && self.partitions[id.idx()] != self.partitions[use_id.idx()]
+                    && !data_inputs.contains(use_id)
+                {
+                    data_inputs.push(*use_id);
+                }
+            }
+        }
+
+        // Second consider the phi nodes in each partition.
+        for id in (0..function.nodes.len()).map(NodeID::new) {
+            if !function.nodes[id.idx()].is_phi() {
+                continue;
+            }
+
+            let data_inputs = &mut data_inputs[self.partitions[id.idx()].idx()];
+            let uses = get_uses(&function.nodes[id.idx()]);
+            for use_id in uses.as_ref() {
+                // For every phi node, if any one of its uses is defined in a
+                // different partition, then the phi node itself, not its
+                // outside uses, is considered a data input. This is because a
+                // phi node whose uses are all in a different partition should
+                // be lowered to a single parameter to the corresponding simple
+                // IR function. Note that for a phi node with some uses outside
+                // and some uses inside the partition, the uses outside the
+                // partition become a single parameter to the simple IR
+                // function, and that parameter and all of the "inside" uses
+                // become the inputs to a phi inside the simple IR function.
+                if self.partitions[id.idx()] != self.partitions[use_id.idx()]
+                    && !data_inputs.contains(&id)
+                {
+                    data_inputs.push(id);
+                    break;
+                }
+            }
+        }
+
+        // Sort the node IDs to keep a consistent interface between partitions.
+        for data_inputs in &mut data_inputs {
+            data_inputs.sort();
+        }
+        data_inputs
+    }
+
+    /*
+     * Compute the data outputs of each partition.
+     */
+    pub fn compute_data_outputs(
+        &self,
+        function: &Function,
+        def_use: &ImmutableDefUseMap,
+    ) -> Vec<Vec<NodeID>> {
+        let mut data_outputs = vec![vec![]; self.num_partitions];
+
+        for id in (0..function.nodes.len()).map(NodeID::new) {
+            if function.nodes[id.idx()].is_control() {
+                continue;
+            }
+
+            let data_outputs = &mut data_outputs[self.partitions[id.idx()].idx()];
+            let users = def_use.get_users(id);
+            for user_id in users.as_ref() {
+                // For every data node, check each of its users. If the node and
+                // its user are in different partitions, then the node is a data
+                // output for the partition of the node. Also, don't add the
+                // same node to the data outputs list twice. It doesn't matter
+                // how this data node is being used - all that matters is that
+                // it itself is a data node, and that it has a user outside the
+                // partition. This makes the code simpler than the inputs case.
+                if self.partitions[id.idx()] != self.partitions[user_id.idx()]
+                    && !data_outputs.contains(&id)
+                {
+                    data_outputs.push(id);
+                    break;
+                }
+            }
+        }
+
+        // Sort the node IDs to keep a consistent interface between partitions.
+        for data_outputs in &mut data_outputs {
+            data_outputs.sort();
+        }
+        data_outputs
+    }
 }
 
 /*
diff --git a/hercules_ir/src/subgraph.rs b/hercules_ir/src/subgraph.rs
index 6d76f6fcc6b63cc97bfaf468f551836339a89656..62ef123e7528cf5526de59017cb31503113456c5 100644
--- a/hercules_ir/src/subgraph.rs
+++ b/hercules_ir/src/subgraph.rs
@@ -4,7 +4,7 @@ use std::collections::HashMap;
 
 /*
  * In various parts of the compiler, we want to consider a subset of a complete
- * function graph. For example, for dominators, we often only want to find the
+ * function graph. For example, for dominators, we often want to find the
  * dominator tree of only the control subgraph.
  */
 #[derive(Debug, Clone)]
@@ -245,3 +245,72 @@ pub fn control_subgraph(function: &Function, def_use: &ImmutableDefUseMap) -> Su
         function.nodes[node.idx()].is_control()
     })
 }
+
+/*
+ * Construct a subgraph representing the control relations between partitions.
+ * Technically, this isn't a "sub"graph of the function graph, since partition
+ * nodes don't correspond to nodes in the original function.
+ */
+pub fn partition_graph(function: &Function, def_use: &ImmutableDefUseMap, plan: &Plan) -> Subgraph {
+    let partition_to_node_ids = plan.invert_partition_map();
+
+    let mut subgraph = Subgraph {
+        nodes: (0..plan.num_partitions).map(NodeID::new).collect(),
+        node_numbers: (0..plan.num_partitions)
+            .map(|idx| (NodeID::new(idx), idx as u32))
+            .collect(),
+        first_forward_edges: vec![],
+        forward_edges: vec![],
+        first_backward_edges: vec![],
+        backward_edges: vec![],
+        original_num_nodes: plan.num_partitions as u32,
+    };
+
+    // Step 1: collect backward edges from use info.
+    for partition in partition_to_node_ids.iter() {
+        // Record the source of the edges (the current partition).
+        let old_num_edges = subgraph.backward_edges.len();
+        subgraph.first_backward_edges.push(old_num_edges as u32);
+        for node in partition.iter() {
+            // Look at all the uses from nodes in that partition.
+            let uses = get_uses(&function.nodes[node.idx()]);
+            for use_id in uses.as_ref() {
+                // Add a backward edge to any different partition we are using
+                // and don't add duplicate backward edges.
+                if plan.partitions[use_id.idx()] != plan.partitions[node.idx()]
+                    && !subgraph.backward_edges[old_num_edges..]
+                        .contains(&(plan.partitions[use_id.idx()].idx() as u32))
+                {
+                    subgraph
+                        .backward_edges
+                        .push(plan.partitions[use_id.idx()].idx() as u32);
+                }
+            }
+        }
+    }
+
+    // Step 2: collect forward edges from user (def_use) info.
+    for partition in partition_to_node_ids.iter() {
+        // Record the source of the edges (the current partition).
+        let old_num_edges = subgraph.forward_edges.len();
+        subgraph.first_forward_edges.push(old_num_edges as u32);
+        for node in partition.iter() {
+            // Look at all the uses from nodes in that partition.
+            let users = def_use.get_users(*node);
+            for user_id in users.as_ref() {
+                // Add a forward edge to any different partition that we are a
+                // user of and don't add duplicate forward edges.
+                if plan.partitions[user_id.idx()] != plan.partitions[node.idx()]
+                    && !subgraph.forward_edges[old_num_edges..]
+                        .contains(&(plan.partitions[user_id.idx()].idx() as u32))
+                {
+                    subgraph
+                        .forward_edges
+                        .push(plan.partitions[user_id.idx()].idx() as u32);
+                }
+            }
+        }
+    }
+
+    subgraph
+}
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index f2e4dbaa9f85123eecaa7972181ea775fab2c74b..140edfe02a4efb7be355ddcf06d75512adc71bd3 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -557,21 +557,8 @@ fn typeflow(
                     }
                 }
                 // Array typechecking also consists of validating the number of constant elements.
-                Constant::Array(id, ref elems) => {
-                    if let Type::Array(_, dc_ids) = &types[id.idx()] {
-                        let mut total_num_elems = 1;
-                        for dc_id in dc_ids.iter() {
-                            total_num_elems *= if let DynamicConstant::Constant(extent) =
-                                dynamic_constants[dc_id.idx()]
-                            {
-                                extent
-                            } else {
-                                return Error(String::from("Array constant type must reference only constant valued dynamic constants."));
-                            };
-                        }
-                        if total_num_elems != 1 && total_num_elems != elems.len() {
-                            return Error(String::from("Array constant must have a compatible amount of elements as the extent of the array."));
-                        }
+                Constant::Array(id) => {
+                    if let Type::Array(_, _) = &types[id.idx()] {
                         Concrete(id)
                     } else {
                         Error(String::from(
@@ -579,8 +566,6 @@ fn typeflow(
                         ))
                     }
                 }
-                // Zero constants need to store their type, and we trust it.
-                Constant::Zero(id) => Concrete(id),
             }
         }
         Node::DynamicConstant { id } => {
diff --git a/hercules_opt/src/ccp.rs b/hercules_opt/src/ccp.rs
index 402e24757a014e5ace86ec6414f32d9fc7eacf23..a3506948ee32db45e30d50bae10d4a257cf8b2bc 100644
--- a/hercules_opt/src/ccp.rs
+++ b/hercules_opt/src/ccp.rs
@@ -456,7 +456,6 @@ fn ccp_flow_function(
                     (UnaryOperator::Neg, Constant::Integer64(val)) => ConstantLattice::Constant(Constant::Integer64(-val)),
                     (UnaryOperator::Neg, Constant::Float32(val)) => ConstantLattice::Constant(Constant::Float32(-val)),
                     (UnaryOperator::Neg, Constant::Float64(val)) => ConstantLattice::Constant(Constant::Float64(-val)),
-                    (UnaryOperator::Neg, Constant::Zero(id)) => ConstantLattice::Constant(Constant::Zero(*id)),
                     (UnaryOperator::Cast(_), _) => ConstantLattice::Bottom,
                     _ => panic!("Unsupported combination of unary operation and constant value. Did typechecking succeed?")
                 }
@@ -485,32 +484,6 @@ fn ccp_flow_function(
                 ConstantLattice::Constant(right_cons),
             ) = (left_constant, right_constant)
             {
-                let type_to_zero_cons = |ty_id: TypeID| {
-                    match types[ty_id.idx()] {
-                    Type::Boolean => Constant::Boolean(false),
-                    Type::Integer8 => Constant::Integer8(0),
-                    Type::Integer16 => Constant::Integer16(0),
-                    Type::Integer32 => Constant::Integer32(0),
-                    Type::Integer64 => Constant::Integer64(0),
-                    Type::UnsignedInteger8 => Constant::UnsignedInteger8(0),
-                    Type::UnsignedInteger16 => Constant::UnsignedInteger16(0),
-                    Type::UnsignedInteger32 => Constant::UnsignedInteger32(0),
-                    Type::UnsignedInteger64 => Constant::UnsignedInteger64(0),
-                    Type::Float32 => Constant::Float32(ordered_float::OrderedFloat::<f32>(0.0)),
-                    Type::Float64 => Constant::Float64(ordered_float::OrderedFloat::<f64>(0.0)),
-                    _ => panic!("Unsupported combination of binary operation and constant values. Did typechecking succeed?")
-                }
-                };
-                let left_cons = if let Constant::Zero(id) = left_cons {
-                    type_to_zero_cons(*id)
-                } else {
-                    left_cons.clone()
-                };
-                let right_cons = if let Constant::Zero(id) = right_cons {
-                    type_to_zero_cons(*id)
-                } else {
-                    right_cons.clone()
-                };
                 let new_cons = match (op, left_cons, right_cons) {
                     (BinaryOperator::Add, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val + right_val),
                     (BinaryOperator::Add, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val + right_val),
@@ -520,8 +493,8 @@ fn ccp_flow_function(
                     (BinaryOperator::Add, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val + right_val),
                     (BinaryOperator::Add, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val + right_val),
                     (BinaryOperator::Add, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val + right_val),
-                    (BinaryOperator::Add, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(left_val + right_val),
-                    (BinaryOperator::Add, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(left_val + right_val),
+                    (BinaryOperator::Add, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(*left_val + *right_val),
+                    (BinaryOperator::Add, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(*left_val + *right_val),
                     (BinaryOperator::Sub, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val - right_val),
                     (BinaryOperator::Sub, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val - right_val),
                     (BinaryOperator::Sub, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val - right_val),
@@ -530,8 +503,8 @@ fn ccp_flow_function(
                     (BinaryOperator::Sub, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val - right_val),
                     (BinaryOperator::Sub, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val - right_val),
                     (BinaryOperator::Sub, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val - right_val),
-                    (BinaryOperator::Sub, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(left_val - right_val),
-                    (BinaryOperator::Sub, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(left_val - right_val),
+                    (BinaryOperator::Sub, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(*left_val - *right_val),
+                    (BinaryOperator::Sub, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(*left_val - *right_val),
                     (BinaryOperator::Mul, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val * right_val),
                     (BinaryOperator::Mul, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val * right_val),
                     (BinaryOperator::Mul, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val * right_val),
@@ -540,8 +513,8 @@ fn ccp_flow_function(
                     (BinaryOperator::Mul, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val * right_val),
                     (BinaryOperator::Mul, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val * right_val),
                     (BinaryOperator::Mul, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val * right_val),
-                    (BinaryOperator::Mul, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(left_val * right_val),
-                    (BinaryOperator::Mul, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(left_val * right_val),
+                    (BinaryOperator::Mul, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(*left_val * *right_val),
+                    (BinaryOperator::Mul, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(*left_val * *right_val),
                     (BinaryOperator::Div, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val / right_val),
                     (BinaryOperator::Div, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val / right_val),
                     (BinaryOperator::Div, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val / right_val),
@@ -550,8 +523,8 @@ fn ccp_flow_function(
                     (BinaryOperator::Div, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val / right_val),
                     (BinaryOperator::Div, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val / right_val),
                     (BinaryOperator::Div, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val / right_val),
-                    (BinaryOperator::Div, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(left_val / right_val),
-                    (BinaryOperator::Div, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(left_val / right_val),
+                    (BinaryOperator::Div, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(*left_val / *right_val),
+                    (BinaryOperator::Div, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(*left_val / *right_val),
                     (BinaryOperator::Rem, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val % right_val),
                     (BinaryOperator::Rem, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val % right_val),
                     (BinaryOperator::Rem, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val % right_val),
@@ -560,8 +533,8 @@ fn ccp_flow_function(
                     (BinaryOperator::Rem, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val % right_val),
                     (BinaryOperator::Rem, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val % right_val),
                     (BinaryOperator::Rem, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val % right_val),
-                    (BinaryOperator::Rem, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(left_val % right_val),
-                    (BinaryOperator::Rem, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(left_val % right_val),
+                    (BinaryOperator::Rem, Constant::Float32(left_val), Constant::Float32(right_val)) => Constant::Float32(*left_val % *right_val),
+                    (BinaryOperator::Rem, Constant::Float64(left_val), Constant::Float64(right_val)) => Constant::Float64(*left_val % *right_val),
                     (BinaryOperator::LT, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Boolean(left_val < right_val),
                     (BinaryOperator::LT, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Boolean(left_val < right_val),
                     (BinaryOperator::LT, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Boolean(left_val < right_val),
@@ -606,7 +579,7 @@ fn ccp_flow_function(
                     // need to unpack the constants.
                     (BinaryOperator::EQ, left_val, right_val) => Constant::Boolean(left_val == right_val),
                     (BinaryOperator::NE, left_val, right_val) => Constant::Boolean(left_val != right_val),
-                    (BinaryOperator::Or, Constant::Boolean(left_val), Constant::Boolean(right_val)) => Constant::Boolean(left_val || right_val),
+                    (BinaryOperator::Or, Constant::Boolean(left_val), Constant::Boolean(right_val)) => Constant::Boolean(*left_val || *right_val),
                     (BinaryOperator::Or, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val | right_val),
                     (BinaryOperator::Or, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val | right_val),
                     (BinaryOperator::Or, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val | right_val),
@@ -615,7 +588,7 @@ fn ccp_flow_function(
                     (BinaryOperator::Or, Constant::UnsignedInteger16(left_val), Constant::UnsignedInteger16(right_val)) => Constant::UnsignedInteger16(left_val | right_val),
                     (BinaryOperator::Or, Constant::UnsignedInteger32(left_val), Constant::UnsignedInteger32(right_val)) => Constant::UnsignedInteger32(left_val | right_val),
                     (BinaryOperator::Or, Constant::UnsignedInteger64(left_val), Constant::UnsignedInteger64(right_val)) => Constant::UnsignedInteger64(left_val | right_val),
-                    (BinaryOperator::And, Constant::Boolean(left_val), Constant::Boolean(right_val)) => Constant::Boolean(left_val && right_val),
+                    (BinaryOperator::And, Constant::Boolean(left_val), Constant::Boolean(right_val)) => Constant::Boolean(*left_val && *right_val),
                     (BinaryOperator::And, Constant::Integer8(left_val), Constant::Integer8(right_val)) => Constant::Integer8(left_val & right_val),
                     (BinaryOperator::And, Constant::Integer16(left_val), Constant::Integer16(right_val)) => Constant::Integer16(left_val & right_val),
                     (BinaryOperator::And, Constant::Integer32(left_val), Constant::Integer32(right_val)) => Constant::Integer32(left_val & right_val),
diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs
index 58342c5e6107df626cca8f5f240f4d7e76808c05..641b46ab5dcd957f1f9d330e8044a120c5b72b83 100644
--- a/hercules_opt/src/pass.rs
+++ b/hercules_opt/src/pass.rs
@@ -1,18 +1,13 @@
-// extern crate hercules_cg;
 extern crate hercules_ir;
 extern crate postcard;
 extern crate serde;
 extern crate take_mut;
 
 use std::collections::HashMap;
-use std::fs::File;
-use std::io::prelude::*;
 use std::iter::zip;
-use std::process::*;
 
 use self::serde::Deserialize;
 
-// use self::hercules_cg::*;
 use self::hercules_ir::*;
 
 use crate::*;
@@ -61,6 +56,7 @@ pub struct PassManager {
     pub loops: Option<Vec<LoopTree>>,
     pub antideps: Option<Vec<Vec<(NodeID, NodeID)>>>,
     pub bbs: Option<Vec<Vec<NodeID>>>,
+    pub fork_join_placements: Option<Vec<Vec<ForkJoinPlacement>>>,
 
     // Current plan. Keep track of the last time the plan was updated.
     pub plans: Option<Vec<Plan>>,
@@ -83,6 +79,7 @@ impl PassManager {
             antideps: None,
             bbs: None,
             plans: None,
+            fork_join_placements: None,
         }
     }
 
@@ -254,6 +251,27 @@ impl PassManager {
         }
     }
 
+    pub fn make_fork_join_placements(&mut self) {
+        if self.fork_join_placements.is_none() {
+            self.make_fork_join_maps();
+            self.make_fork_join_nests();
+            self.make_bbs();
+            let fork_join_maps = self.fork_join_maps.as_ref().unwrap().iter();
+            let fork_join_nests = self.fork_join_nests.as_ref().unwrap().iter();
+            let bbs = self.bbs.as_ref().unwrap().iter();
+            self.fork_join_placements = Some(
+                zip(
+                    self.module.functions.iter(),
+                    zip(fork_join_maps, zip(fork_join_nests, bbs)),
+                )
+                .map(|(function, (fork_join_map, (fork_join_nest, bb)))| {
+                    compute_fork_join_placement(function, fork_join_map, fork_join_nest, bb)
+                })
+                .collect(),
+            );
+        }
+    }
+
     pub fn make_plans(&mut self) {
         if self.plans.is_none() {
             self.make_reverse_postorders();
@@ -412,6 +430,17 @@ impl PassManager {
                     self.postdoms = Some(postdoms);
                     self.fork_join_maps = Some(fork_join_maps);
 
+                    // Verify the plan, if it exists.
+                    if let Some(plans) = &self.plans {
+                        for idx in 0..self.module.functions.len() {
+                            plans[idx].verify_partitioning(
+                                &self.module.functions[idx],
+                                &self.def_uses.as_ref().unwrap()[idx],
+                                &self.fork_join_maps.as_ref().unwrap()[idx],
+                            );
+                        }
+                    }
+
                     // Verify doesn't require clearing analysis results.
                     continue;
                 }
@@ -420,21 +449,24 @@ impl PassManager {
                     if *force_analyses {
                         self.make_doms();
                         self.make_fork_join_maps();
+                        self.make_bbs();
                         self.make_plans();
+                        self.make_fork_join_placements();
                     }
                     xdot_module(
                         &self.module,
                         self.reverse_postorders.as_ref().unwrap(),
                         self.doms.as_ref(),
                         self.fork_join_maps.as_ref(),
+                        self.bbs.as_ref(),
                         self.plans.as_ref(),
+                        self.fork_join_placements.as_ref(),
                     );
 
                     // Xdot doesn't require clearing analysis results.
                     continue;
                 }
                 Pass::Codegen(output_file_name) => {
-                    /* 
                     self.make_def_uses();
                     self.make_reverse_postorders();
                     self.make_typing();
@@ -444,50 +476,51 @@ impl PassManager {
                     self.make_antideps();
                     self.make_bbs();
                     self.make_plans();
-
-                    let mut llvm_ir = String::new();
-                    let manifest = codegen(
-                        &self.module,
-                        self.def_uses.as_ref().unwrap(),
-                        self.reverse_postorders.as_ref().unwrap(),
-                        self.typing.as_ref().unwrap(),
-                        self.control_subgraphs.as_ref().unwrap(),
-                        self.fork_join_maps.as_ref().unwrap(),
-                        self.fork_join_nests.as_ref().unwrap(),
-                        self.antideps.as_ref().unwrap(),
-                        self.bbs.as_ref().unwrap(),
-                        self.plans.as_ref().unwrap(),
-                        &mut llvm_ir,
-                    )
-                    .unwrap();
-
-                    // Compile LLVM IR into ELF object.
-                    let llc_process = Command::new("llc")
-                        .arg("-filetype=obj")
-                        .arg("-O3")
-                        .stdin(Stdio::piped())
-                        .stdout(Stdio::piped())
-                        .spawn()
-                        .unwrap();
-                    llc_process
-                        .stdin
-                        .as_ref()
-                        .unwrap()
-                        .write(llvm_ir.as_bytes())
-                        .unwrap();
-                    let elf_object = llc_process.wait_with_output().unwrap().stdout;
-
-                    // Package manifest and ELF object into the same file.
-                    let hbin_module = (manifest, elf_object);
-                    let hbin_contents: Vec<u8> = postcard::to_allocvec(&hbin_module).unwrap();
-
-                    let mut file =
-                        File::create(output_file_name).expect("PANIC: Unable to open output file.");
-                    file.write_all(&hbin_contents)
-                        .expect("PANIC: Unable to write output file contents.");
-
-                    // Codegen doesn't require clearing analysis results.*/
-                    continue; 
+                    self.make_fork_join_placements();
+
+                    //let smodule = simple_compile(
+                    //    &self.module,
+                    //    self.def_uses.as_ref().unwrap(),
+                    //    self.reverse_postorders.as_ref().unwrap(),
+                    //    self.typing.as_ref().unwrap(),
+                    //    self.control_subgraphs.as_ref().unwrap(),
+                    //    self.fork_join_maps.as_ref().unwrap(),
+                    //    self.fork_join_nests.as_ref().unwrap(),
+                    //    self.antideps.as_ref().unwrap(),
+                    //    self.bbs.as_ref().unwrap(),
+                    //    self.plans.as_ref().unwrap(),
+                    //    self.fork_join_placements.as_ref().unwrap(),
+                    //);
+                    //println!("{:#?}", smodule);
+
+                    //// Compile LLVM IR into ELF object.
+                    //let llc_process = Command::new("llc")
+                    //    .arg("-filetype=obj")
+                    //    .arg("-O3")
+                    //    .stdin(Stdio::piped())
+                    //    .stdout(Stdio::piped())
+                    //    .spawn()
+                    //    .unwrap();
+                    //llc_process
+                    //    .stdin
+                    //    .as_ref()
+                    //    .unwrap()
+                    //    .write(llvm_ir.as_bytes())
+                    //    .unwrap();
+                    //let elf_object = llc_process.wait_with_output().unwrap().stdout;
+                    //
+                    //// Package manifest and ELF object into the same file.
+                    //let hbin_module = (manifest, elf_object);
+                    //let hbin_contents: Vec<u8> = postcard::to_allocvec(&hbin_module).unwrap();
+                    //
+                    //let mut file =
+                    //    File::create(output_file_name).expect("PANIC: Unable to open output file.");
+                    //file.write_all(&hbin_contents)
+                    //    .expect("PANIC: Unable to write output file contents.");
+                    //
+
+                    // Codegen doesn't require clearing analysis results.
+                    continue;
                 }
             }
 
diff --git a/hercules_opt/src/sroa.rs b/hercules_opt/src/sroa.rs
index ef3eceab898147011f1ab932686afbbda6b4fc45..c48b85ae9d838b22700b55cca78550d6e6a034f2 100644
--- a/hercules_opt/src/sroa.rs
+++ b/hercules_opt/src/sroa.rs
@@ -181,7 +181,6 @@ pub fn sroa(
             }
         })
         .collect();
-    println!("{:?}", to_sroa);
 
     // Perform SROA. TODO: repair def-use when there are multiple product
     // constants to SROA away.
@@ -189,14 +188,10 @@ pub fn sroa(
     for (constant_node_id, constant_id) in to_sroa {
         // Get the field constants to replace the product constant with.
         let product_constant = constants[constant_id.idx()].clone();
-        let constant_fields = product_constant
-            .try_product_fields(types, constants)
-            .unwrap();
-        println!("{:?}", constant_fields);
+        let constant_fields = product_constant.try_product_fields().unwrap();
 
         // DFS to find all data nodes that use the product constant.
         let to_replace = sroa_dfs(constant_node_id, function, def_use);
-        println!("{:?}", to_replace);
 
         // Assemble a mapping from old nodes IDs acting on the product constant
         // to new nodes IDs operating on the field constants.
diff --git a/hercules_samples/fork_join.hir b/hercules_samples/fork_join.hir
index 99e95829578f5893ce507e9643a97727fb85ccc2..fe90da4d85bfcb89a51c2b3c39a30e9af7dd6139 100644
--- a/hercules_samples/fork_join.hir
+++ b/hercules_samples/fork_join.hir
@@ -1,8 +1,16 @@
 fn fork_join<1>() -> u64
-  f_ctrl = fork(start, #0)
-  j_ctrl = join(f_ctrl)
+  f_ctrl1 = fork(start, #0)
+  f_ctrl2 = fork(f_ctrl1, #0)
+  j_ctrl2 = join(f_ctrl2)
+  j_ctrl1 = join(j_ctrl2)
   zero = constant(u64, 0)
-  x = thread_id(f_ctrl)
-  data = reduce(j_ctrl, zero, sum)
-  sum = add(data, x)
-  r = return(j_ctrl, data)
+  x1 = thread_id(f_ctrl1)
+  x1_d = add(x1, x1)
+  x2 = thread_id(f_ctrl2)
+  x2_d = add(x2, x2)
+  data2 = reduce(j_ctrl2, zero, sum2)
+  sum2 = add(data2, x2_d)
+  extra = add(data2, x1_d)
+  data1 = reduce(j_ctrl1, zero, sum1)
+  sum1 = add(data1, extra)
+  r = return(j_ctrl1, data1)
diff --git a/hercules_samples/matmul/matmul.hir b/hercules_samples/matmul/matmul.hir
index 2f0fb67afff4c7707523bd6ae26c87b995e4109b..8c34a31664f2b73ebac9077e8605ac221a266e0a 100644
--- a/hercules_samples/matmul/matmul.hir
+++ b/hercules_samples/matmul/matmul.hir
@@ -1,5 +1,5 @@
 fn matmul<3>(a: array(f32, #0, #1), b: array(f32, #1, #2)) -> array(f32, #0, #2)
-  c = constant(array(f32, #0, #2), zero)
+  c = constant(array(f32, #0, #2), [])
   i_ctrl = fork(start, #0)
   i_idx = thread_id(i_ctrl)
   j_ctrl = fork(i_ctrl, #2)