diff --git a/Cargo.lock b/Cargo.lock
index e525360763eadfc7dccd68bf541dc961667962c2..1e301dc0b414a284180c5ff73e6f0425537585f8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -143,6 +143,13 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 
+[[package]]
+name = "hercules_codegen"
+version = "0.1.0"
+dependencies = [
+ "hercules_ir",
+]
+
 [[package]]
 name = "hercules_ir"
 version = "0.1.0"
@@ -152,12 +159,21 @@ dependencies = [
  "ordered-float",
 ]
 
+[[package]]
+name = "hercules_opt"
+version = "0.1.0"
+dependencies = [
+ "hercules_ir",
+]
+
 [[package]]
 name = "hercules_tools"
 version = "0.1.0"
 dependencies = [
  "clap",
+ "hercules_codegen",
  "hercules_ir",
+ "hercules_opt",
  "rand",
 ]
 
diff --git a/Cargo.toml b/Cargo.toml
index a5227e311467bcf857ec0d14c6e36b2d337b95b1..fc931a3cd3057a8ae8f64a939d51ba14b465caee 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,8 @@
 [workspace]
 
 members = [
+	"hercules_codegen",
 	"hercules_ir",
+	"hercules_opt",
 	"hercules_tools"
 ]
diff --git a/hercules_codegen/Cargo.toml b/hercules_codegen/Cargo.toml
new file mode 100644
index 0000000000000000000000000000000000000000..bd50b13f981284a2b4c13e4fc16f7d23a2b43e45
--- /dev/null
+++ b/hercules_codegen/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "hercules_codegen"
+version = "0.1.0"
+authors = ["Russel Arbore <rarbore2@illinois.edu>"]
+
+[dependencies]
+hercules_ir = { path = "../hercules_ir" }
diff --git a/hercules_codegen/src/gcm.rs b/hercules_codegen/src/gcm.rs
new file mode 100644
index 0000000000000000000000000000000000000000..132ce305e2ea1fe80a7e90e8197d4c22ac9b2500
--- /dev/null
+++ b/hercules_codegen/src/gcm.rs
@@ -0,0 +1,71 @@
+extern crate hercules_ir;
+
+use std::collections::HashMap;
+
+use self::hercules_ir::dataflow::*;
+use self::hercules_ir::def_use::*;
+use self::hercules_ir::dom::*;
+use self::hercules_ir::ir::*;
+use self::hercules_ir::loops::*;
+use self::hercules_ir::subgraph::*;
+
+/*
+ * Top level global code motion function. Assigns each data node to one of its
+ * immediate control use / user nodes, forming (unordered) basic blocks. Returns
+ * the control node / basic block each node is in.
+ */
+pub fn gcm(
+    function: &Function,
+    def_use: &ImmutableDefUseMap,
+    reverse_postorder: &Vec<NodeID>,
+    control_subgraph: &Subgraph,
+    dom: &DomTree,
+    fork_join_map: &HashMap<NodeID, NodeID>,
+) -> Vec<NodeID> {
+    // Step 1: find the immediate control uses and immediate control users of
+    // each node.
+    let immediate_control_uses =
+        forward_dataflow(function, reverse_postorder, |inputs, node_id| {
+            immediate_control_flow(inputs, node_id, function)
+        });
+    let immediate_control_users =
+        backward_dataflow(function, def_use, reverse_postorder, |inputs, node_id| {
+            immediate_control_flow(inputs, node_id, function)
+        });
+
+    // Step 2: calculate loop tree of function.
+    let loops = loops(&control_subgraph, NodeID::new(0), &dom, fork_join_map);
+
+    // Step 3: find most control dependent, shallowest loop level node for every
+    // node.
+    let bbs = (0..function.nodes.len())
+        .map(|idx| {
+            let highest =
+                dom.lowest_amongst(immediate_control_uses[idx].nodes(function.nodes.len() as u32));
+            let lowest = dom
+                .common_ancestor(immediate_control_users[idx].nodes(function.nodes.len() as u32));
+
+            // Collect into vector to reverse, since we want to traverse down
+            // the dom tree, not up it.
+            let mut chain = dom
+                .chain(lowest, highest)
+                .collect::<Vec<_>>()
+                .into_iter()
+                .rev();
+
+            let mut location = chain.next().unwrap();
+            while let Some(control_node) = chain.next() {
+                // Traverse down the dom tree until we find a loop.
+                if loops.contains(control_node) {
+                    break;
+                } else {
+                    location = control_node;
+                }
+            }
+
+            location
+        })
+        .collect();
+
+    bbs
+}
diff --git a/hercules_codegen/src/lib.rs b/hercules_codegen/src/lib.rs
new file mode 100644
index 0000000000000000000000000000000000000000..fd605651eed486718d591a1aa7d979f3e0c28ad5
--- /dev/null
+++ b/hercules_codegen/src/lib.rs
@@ -0,0 +1,3 @@
+pub mod gcm;
+
+pub use crate::gcm::*;
diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 0cb95e31f96b2648b9e3988466d0b6102641415e..c3f3e9f900116e79f10803f3225fa5c5565f0bbb 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -1,6 +1,7 @@
 extern crate bitvec;
 
-use dataflow::bitvec::prelude::*;
+use self::bitvec::prelude::*;
+use self::bitvec::slice::*;
 
 use crate::*;
 
@@ -33,7 +34,7 @@ where
     L: Semilattice,
     F: FnMut(&[&L], NodeID) -> L,
 {
-    forward_dataflow_global(function, reverse_postorder, |global_outs, node_id| {
+    dataflow_global(function, reverse_postorder, |global_outs, node_id| {
         let uses = get_uses(&function.nodes[node_id.idx()]);
         let pred_outs: Vec<_> = uses
             .as_ref()
@@ -45,16 +46,42 @@ where
 }
 
 /*
- * The previous forward dataflow routine wraps around this dataflow routine,
+ * Top level backward dataflow function. Instead of passing the uses' lattice
+ * values to the flow function, passes in the users' lattice values.
+ */
+pub fn backward_dataflow<L, F>(
+    function: &Function,
+    def_use: &ImmutableDefUseMap,
+    reverse_postorder: &Vec<NodeID>,
+    mut flow_function: F,
+) -> Vec<L>
+where
+    L: Semilattice,
+    F: FnMut(&[&L], NodeID) -> L,
+{
+    let mut postorder = reverse_postorder.clone();
+    postorder.reverse();
+    dataflow_global(function, &postorder, |global_outs, node_id| {
+        let users = def_use.get_users(node_id);
+        let succ_outs: Vec<_> = users
+            .as_ref()
+            .iter()
+            .map(|id| &global_outs[id.idx()])
+            .collect();
+        flow_function(&succ_outs, node_id)
+    })
+}
+
+/*
+ * The previous forward dataflow routines wraps around this dataflow routine,
  * where the flow function doesn't just have access to this nodes input lattice
  * values, but also all the current lattice values for all the nodes. This is
  * useful for some dataflow analyses, such as reachability. The "global" in
- * forward_dataflow_global refers to having a global view of the out lattice
- * values.
+ * dataflow_global refers to having a global view of the out lattice values.
  */
-pub fn forward_dataflow_global<L, F>(
+pub fn dataflow_global<L, F>(
     function: &Function,
-    reverse_postorder: &Vec<NodeID>,
+    order: &Vec<NodeID>,
     mut flow_function: F,
 ) -> Vec<L>
 where
@@ -62,9 +89,7 @@ where
     F: FnMut(&[L], NodeID) -> L,
 {
     // Step 1: create initial set of "out" points.
-    let start_node_output = flow_function(&[], NodeID::new(0));
-    let mut first_ins = vec![L::top(); function.nodes.len()];
-    first_ins[0] = start_node_output;
+    let first_ins = vec![L::top(); function.nodes.len()];
     let mut outs: Vec<L> = (0..function.nodes.len())
         .map(|id| flow_function(&first_ins, NodeID::new(id)))
         .collect();
@@ -73,9 +98,9 @@ where
     loop {
         let mut change = false;
 
-        // Iterate nodes in reverse post order.
-        for node_id in reverse_postorder {
-            // Compute new "out" value from predecessor "out" values.
+        // Iterate nodes in specified order.
+        for node_id in order {
+            // Compute new "out" value from previous "out" values.
             let new_out = flow_function(&outs, *node_id);
             if outs[node_id.idx()] != new_out {
                 change = true;
@@ -157,6 +182,16 @@ impl IntersectNodeSet {
             IntersectNodeSet::Full => true,
         }
     }
+
+    pub fn nodes(&self, num_nodes: u32) -> NodeSetIterator {
+        match self {
+            IntersectNodeSet::Empty => NodeSetIterator::Empty,
+            IntersectNodeSet::Bits(bitvec) => {
+                NodeSetIterator::Bits(bitvec.iter_ones().map(NodeID::new))
+            }
+            IntersectNodeSet::Full => NodeSetIterator::Full(0, num_nodes),
+        }
+    }
 }
 
 impl Semilattice for IntersectNodeSet {
@@ -205,6 +240,16 @@ impl UnionNodeSet {
             UnionNodeSet::Full => true,
         }
     }
+
+    pub fn nodes(&self, num_nodes: u32) -> NodeSetIterator {
+        match self {
+            UnionNodeSet::Empty => NodeSetIterator::Empty,
+            UnionNodeSet::Bits(bitvec) => {
+                NodeSetIterator::Bits(bitvec.iter_ones().map(NodeID::new))
+            }
+            UnionNodeSet::Full => NodeSetIterator::Full(0, num_nodes),
+        }
+    }
 }
 
 impl Semilattice for UnionNodeSet {
@@ -234,6 +279,33 @@ impl Semilattice for UnionNodeSet {
     }
 }
 
+#[derive(Clone, Debug)]
+pub enum NodeSetIterator<'a> {
+    Empty,
+    Bits(std::iter::Map<IterOnes<'a, u8, LocalBits>, fn(usize) -> ir::NodeID>),
+    Full(u32, u32),
+}
+
+impl<'a> Iterator for NodeSetIterator<'a> {
+    type Item = NodeID;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self {
+            NodeSetIterator::Empty => None,
+            NodeSetIterator::Bits(iter) => iter.next(),
+            NodeSetIterator::Full(idx, cap) => {
+                if idx < cap {
+                    let id = NodeID::new(*idx as usize);
+                    *idx += 1;
+                    Some(id)
+                } else {
+                    None
+                }
+            }
+        }
+    }
+}
+
 /*
  * Below are some common flow functions. They all take a slice of semilattice
  * references as their first argument, and a node ID as their second. However,
@@ -273,3 +345,47 @@ pub fn control_output_flow(
 
     out
 }
+
+/*
+ * Flow function for collecting all of a data node's immediate uses / users of
+ * control nodes. Useful for code generation. Since this is for immediate uses /
+ * users of control nodes, control node uses / users do not propagate through
+ * control nodes, or through control output nodes (phis, thread IDs, collects).
+ */
+pub fn immediate_control_flow(
+    inputs: &[&UnionNodeSet],
+    mut node_id: NodeID,
+    function: &Function,
+) -> UnionNodeSet {
+    let mut out = UnionNodeSet::top();
+
+    // Step 1: replace node if this is a phi, thread ID, or collect.
+    if let Node::Phi { control, data: _ }
+    | Node::ThreadID { control }
+    | Node::Collect { control, data: _ } = &function.nodes[node_id.idx()]
+    {
+        node_id = *control;
+    } else {
+        // Union node inputs if not a special case.
+        out = inputs
+            .into_iter()
+            .fold(UnionNodeSet::top(), |a, b| UnionNodeSet::meet(&a, b));
+    }
+    let node = &function.nodes[node_id.idx()];
+
+    // Step 2: figure out if this node is a control node.
+    let control = if let Node::ReadProd { prod, index: _ } = node {
+        function.nodes[prod.idx()].is_strictly_control()
+    } else {
+        node.is_strictly_control()
+    };
+
+    // Step 3: clear all bits and set bit for current node, if applicable.
+    if control {
+        let mut singular = bitvec![u8, Lsb0; 0; function.nodes.len()];
+        singular.set(node_id.idx(), true);
+        out = UnionNodeSet::Bits(singular);
+    }
+
+    out
+}
diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index fc5fc3983ca0683cdad46b336ef5479041dd91f2..b732db550c994bd673dfaa42335c33824dc965ba 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -1,21 +1,28 @@
-extern crate bitvec;
+use std::collections::HashMap;
 
 use crate::*;
 
-use std::collections::HashMap;
-
 /*
  * Custom type for storing a dominator tree. For each control node, store its
- * immediate dominator.
+ * immediate dominator, and its level in the dominator tree. Dominator tree
+ * levels are used for finding common ancestors.
  */
 #[derive(Debug, Clone)]
 pub struct DomTree {
-    idom: HashMap<NodeID, NodeID>,
+    root: NodeID,
+    idom: HashMap<NodeID, (u32, NodeID)>,
+}
+
+#[derive(Debug, Clone)]
+pub struct DomChainIterator<'a> {
+    dom: &'a DomTree,
+    iter: Option<NodeID>,
+    top: NodeID,
 }
 
 impl DomTree {
     pub fn imm_dom(&self, x: NodeID) -> Option<NodeID> {
-        self.idom.get(&x).map(|x| x.clone())
+        self.idom.get(&x).map(|x| x.1)
     }
 
     pub fn does_imm_dom(&self, a: NodeID, b: NodeID) -> bool {
@@ -47,19 +54,90 @@ impl DomTree {
         self.idom.contains_key(&x)
     }
 
+    pub fn contains(&self, x: NodeID) -> bool {
+        x == self.root || self.idom.contains_key(&x)
+    }
+
     /*
-     * Typically, node ID 0 is the root of the dom tree. Under this assumption,
-     * this function checks if a node is in the dom tree.
+     * Find the node with the lowest level in the dom tree amongst the nodes
+     * given. Although not technically necessary, you're probably using this
+     * function wrong if the nodes in the iterator do not form a dominance
+     * chain.
      */
-    pub fn contains_conventional(&self, x: NodeID) -> bool {
-        x == NodeID::new(0) || self.idom.contains_key(&x)
+    pub fn lowest_amongst<I>(&self, x: I) -> NodeID
+    where
+        I: Iterator<Item = NodeID>,
+    {
+        x.map(|x| {
+            if x == self.root {
+                (0, x)
+            } else {
+                (self.idom[&x].0, x)
+            }
+        })
+        .max_by(|x, y| x.0.cmp(&y.0))
+        .unwrap()
+        .1
+    }
+
+    pub fn common_ancestor<I>(&self, x: I) -> NodeID
+    where
+        I: Iterator<Item = NodeID>,
+    {
+        let mut positions: HashMap<NodeID, u32> = x
+            .map(|x| (x, if x == self.root { 0 } else { self.idom[&x].0 }))
+            .collect();
+        let mut current_level = *positions.iter().map(|(_, level)| level).max().unwrap();
+        while positions.len() > 1 {
+            let at_current_level: Vec<NodeID> = positions
+                .iter()
+                .filter(|(_, level)| **level == current_level)
+                .map(|(node, _)| *node)
+                .collect();
+            for node in at_current_level.into_iter() {
+                positions.remove(&node);
+                let (level, parent) = self.idom[&node];
+                assert!(level == current_level);
+                positions.insert(parent, level - 1);
+            }
+            current_level -= 1;
+        }
+        positions.into_iter().next().unwrap().0
     }
 
-    pub fn get_underlying_map(&self) -> &HashMap<NodeID, NodeID> {
+    pub fn chain<'a>(&'a self, bottom: NodeID, top: NodeID) -> DomChainIterator<'a> {
+        DomChainIterator {
+            dom: self,
+            iter: Some(bottom),
+            top,
+        }
+    }
+
+    pub fn get_underlying_map(&self) -> &HashMap<NodeID, (u32, NodeID)> {
         &self.idom
     }
 }
 
+impl<'a> Iterator for DomChainIterator<'a> {
+    type Item = NodeID;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(iter) = self.iter {
+            let ret = iter;
+            if ret == self.top {
+                self.iter = None;
+            } else if let Some(iter) = self.dom.imm_dom(iter) {
+                self.iter = Some(iter);
+            } else {
+                panic!("In DomChainIterator, top node doesn't dominate bottom node.")
+            }
+            Some(ret)
+        } else {
+            None
+        }
+    }
+}
+
 /*
  * Top level function for calculating dominator trees. Uses the semi-NCA
  * algorithm, as described in "Finding Dominators in Practice".
@@ -75,7 +153,7 @@ pub fn dominator(subgraph: &Subgraph, root: NodeID) -> DomTree {
     let mut idom = HashMap::new();
     for w in preorder[1..].iter() {
         // Each idom starts as the parent node.
-        idom.insert(*w, parents[w]);
+        idom.insert(*w, (0, parents[w]));
     }
 
     // Step 2: define snca_compress, which will be used to compute semi-
@@ -116,12 +194,28 @@ pub fn dominator(subgraph: &Subgraph, root: NodeID) -> DomTree {
     // Step 4: compute idom.
     for v_n in 1..preorder.len() {
         let v = preorder[v_n];
-        while node_numbers[&idom[&v]] > semi[v_n] {
-            *idom.get_mut(&v).unwrap() = idom[&idom[&v]];
+        while node_numbers[&idom[&v].1] > semi[v_n] {
+            *idom.get_mut(&v).unwrap() = idom[&idom[&v].1];
+        }
+    }
+
+    // Step 5: compute levels in idom.
+    let mut change = true;
+    while change {
+        change = false;
+        for node in preorder[1..].iter() {
+            let (level, parent) = idom[node];
+            if level == 0 && parent == root {
+                idom.get_mut(node).unwrap().0 = 1;
+                change = true;
+            } else if level == 0 && idom[&parent].0 != 0 {
+                idom.get_mut(node).unwrap().0 = 1 + idom[&parent].0;
+                change = true;
+            }
         }
     }
 
-    DomTree { idom }
+    DomTree { root, idom }
 }
 
 fn preorder(subgraph: &Subgraph, root: NodeID) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs
index 943d04938a30add8a201f32a2245518b3ab33717..6d4364cd5518321e0d3e41d91bccad2e840166ab 100644
--- a/hercules_ir/src/lib.rs
+++ b/hercules_ir/src/lib.rs
@@ -1,24 +1,20 @@
 pub mod build;
-pub mod ccp;
 pub mod dataflow;
-pub mod dce;
 pub mod def_use;
 pub mod dom;
-pub mod gvn;
 pub mod ir;
+pub mod loops;
 pub mod parse;
 pub mod subgraph;
 pub mod typecheck;
 pub mod verify;
 
 pub use crate::build::*;
-pub use crate::ccp::*;
 pub use crate::dataflow::*;
-pub use crate::dce::*;
 pub use crate::def_use::*;
 pub use crate::dom::*;
-pub use crate::gvn::*;
 pub use crate::ir::*;
+pub use crate::loops::*;
 pub use crate::parse::*;
 pub use crate::subgraph::*;
 pub use crate::typecheck::*;
diff --git a/hercules_ir/src/loops.rs b/hercules_ir/src/loops.rs
new file mode 100644
index 0000000000000000000000000000000000000000..cc302998b92062dcf3ff9c794566be5d409b4629
--- /dev/null
+++ b/hercules_ir/src/loops.rs
@@ -0,0 +1,133 @@
+extern crate bitvec;
+
+use std::collections::HashMap;
+
+use self::bitvec::prelude::*;
+
+use crate::*;
+
+/*
+ * Custom type for storing a loop tree. Each node corresponds to a single loop
+ * or a fork join pair in the IR graph. Each node in the tree corresponds to
+ * some subset of the overall IR graph. The root node corresponds to the entire
+ * IR graph. The children of the root correspond to the top-level loops and fork
+ * join pairs, and so on. Each node in the loop tree has a representative
+ * "header" node. For normal loops, this is the region node branched to by a
+ * dominated if node. For fork join pairs, this is the fork node. A loop is a
+ * top-level loop if its parent is the root node of the subgraph.
+ */
+#[derive(Debug, Clone)]
+pub struct LoopTree {
+    root: NodeID,
+    loops: HashMap<NodeID, (BitVec<u8, Lsb0>, NodeID)>,
+}
+
+impl LoopTree {
+    pub fn contains(&self, x: NodeID) -> bool {
+        x == self.root || self.loops.contains_key(&x)
+    }
+}
+
+/*
+ * Top level function for calculating loop trees.
+ */
+pub fn loops(
+    subgraph: &Subgraph,
+    root: NodeID,
+    dom: &DomTree,
+    fork_join_map: &HashMap<NodeID, NodeID>,
+) -> LoopTree {
+    // Step 1: collect loop back edges.
+    let mut loop_back_edges = vec![];
+    for node in subgraph.iter() {
+        // Check successors. Any successor dominating its predecessor is the
+        // destination of a loop back edge.
+        for succ in subgraph.succs(*node) {
+            if dom.does_dom(succ, *node) {
+                loop_back_edges.push((*node, succ));
+            }
+        }
+    }
+
+    // Step 2: collect "edges" from joins to forks. Technically, this doesn't
+    // correspond to a real edge in the graph. However, our loop tree includes
+    // fork join pairs as loops, so create a phantom loop back edge.
+    for (fork, join) in fork_join_map {
+        loop_back_edges.push((*join, *fork));
+    }
+
+    // Step 3: find control nodes inside each loop. For a particular natural
+    // loop with header d and a back edge from node n to d, the nodes in the
+    // loop are d itself, and all nodes with a path to n not going through d.
+    let loop_contents = loop_back_edges.iter().map(|(n, d)| {
+        // Compute reachability for each loop back edge.
+        let mut loop_contents = loop_reachability(*n, *d, subgraph);
+        loop_contents.set(d.idx(), true);
+        (d, loop_contents)
+    });
+
+    // Step 4: merge loops with same header into a single natural loop.
+    let mut loops: HashMap<NodeID, BitVec<u8, Lsb0>> = HashMap::new();
+    for (header, contents) in loop_contents {
+        if loops.contains_key(header) {
+            let old_contents = loops.remove(header).unwrap();
+            loops.insert(*header, old_contents | contents);
+        } else {
+            loops.insert(*header, contents);
+        }
+    }
+
+    // Step 5: figure out loop tree edges. A loop with header a can only be an
+    // outer loop of a loop with header b if a dominates b.
+    let loops = loops
+        .iter()
+        .map(|(header, contents)| {
+            let mut dominator = *header;
+            while let Some(new_dominator) = dom.imm_dom(dominator) {
+                dominator = new_dominator;
+                if let Some(outer_contents) = loops.get(&dominator) {
+                    if outer_contents[header.idx()] {
+                        return (*header, (contents.clone(), dominator));
+                    }
+                }
+            }
+            (*header, (contents.clone(), root))
+        })
+        .collect();
+
+    LoopTree { root, loops }
+}
+
+fn loop_reachability(n: NodeID, d: NodeID, subgraph: &Subgraph) -> BitVec<u8, Lsb0> {
+    let visited = bitvec![u8, Lsb0; 0; subgraph.original_num_nodes() as usize];
+
+    // n is the root of the traversal, finding d is a termination condition.
+    let visited = loop_reachability_helper(n, d, subgraph, visited);
+
+    visited
+}
+
+fn loop_reachability_helper(
+    n: NodeID,
+    d: NodeID,
+    subgraph: &Subgraph,
+    mut visited: BitVec<u8, Lsb0>,
+) -> BitVec<u8, Lsb0> {
+    if visited[n.idx()] {
+        // If already visited, return early.
+        visited
+    } else {
+        // Set visited to true.
+        visited.set(n.idx(), true);
+
+        // Iterate over predecessors.
+        for pred in subgraph.preds(n) {
+            // Don't traverse d.
+            if pred != d {
+                visited = loop_reachability_helper(pred, d, subgraph, visited);
+            }
+        }
+
+        visited
+    }
+}
diff --git a/hercules_ir/src/subgraph.rs b/hercules_ir/src/subgraph.rs
index dd7cb2a60f0511c985fe876c9a88f2cb16f1f695..290abd7475c4af18e39f1f42f634c9d63f13f091 100644
--- a/hercules_ir/src/subgraph.rs
+++ b/hercules_ir/src/subgraph.rs
@@ -15,6 +15,7 @@ pub struct Subgraph {
     forward_edges: Vec<u32>,
     first_backward_edges: Vec<u32>,
     backward_edges: Vec<u32>,
+    original_num_nodes: u32,
 }
 
 pub struct SubgraphIterator<'a> {
@@ -36,15 +37,32 @@ impl<'a> Iterator for SubgraphIterator<'a> {
     }
 }
 
+impl IntoIterator for Subgraph {
+    type Item = NodeID;
+    type IntoIter = std::vec::IntoIter<Self::Item>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.nodes.into_iter()
+    }
+}
+
 impl Subgraph {
     pub fn num_nodes(&self) -> u32 {
         self.nodes.len() as u32
     }
 
+    pub fn original_num_nodes(&self) -> u32 {
+        self.original_num_nodes
+    }
+
     pub fn contains_node(&self, id: NodeID) -> bool {
         self.node_numbers.contains_key(&id)
     }
 
+    pub fn iter<'a>(&'a self) -> std::slice::Iter<'a, NodeID> {
+        self.nodes.iter()
+    }
+
     pub fn preds(&self, id: NodeID) -> SubgraphIterator {
         let number = self.node_numbers[&id];
         if ((number + 1) as usize) < self.first_backward_edges.len() {
@@ -86,6 +104,7 @@ impl Subgraph {
             forward_edges,
             mut first_backward_edges,
             mut backward_edges,
+            original_num_nodes,
         } = self;
 
         // Since we need to add a "new" root to the subgraph, we first need to
@@ -138,6 +157,7 @@ impl Subgraph {
             forward_edges: backward_edges,
             first_backward_edges: new_first_forward_edges,
             backward_edges: new_forward_edges,
+            original_num_nodes,
         }
     }
 }
@@ -159,6 +179,7 @@ where
         forward_edges: vec![],
         first_backward_edges: vec![],
         backward_edges: vec![],
+        original_num_nodes: function.nodes.len() as u32,
     };
 
     // Step 1: collect predicated nodes.
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index 4c9db60bb7a690652ae08d3cca4365cb7a207470..00513d52db117232cd0aa12220fd8c61766dab27 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -3,7 +3,7 @@ extern crate bitvec;
 use std::collections::HashMap;
 use std::iter::zip;
 
-use verify::bitvec::prelude::*;
+use self::bitvec::prelude::*;
 
 use crate::*;
 
@@ -19,6 +19,7 @@ pub fn verify(
         Vec<ImmutableDefUseMap>,
         Vec<Vec<NodeID>>,
         ModuleTyping,
+        Vec<Subgraph>,
         Vec<DomTree>,
         Vec<DomTree>,
         Vec<HashMap<NodeID, NodeID>>,
@@ -79,10 +80,17 @@ pub fn verify(
         )?;
     }
 
+    // Recalculate subgraphs for return since postdominator analysis modifies
+    // them.
+    let subgraphs: Vec<_> = zip(module.functions.iter(), def_uses.iter())
+        .map(|(function, def_use)| control_subgraph(function, def_use))
+        .collect();
+
     Ok((
         def_uses,
         reverse_postorders,
         typing,
+        subgraphs,
         doms,
         postdoms,
         fork_join_maps,
@@ -392,7 +400,7 @@ fn verify_dominance_relationships(
             // If the node to be added to the to_check vector isn't even in the
             // dominator tree, don't bother. It doesn't need to be checked for
             // dominance relations.
-            if !dom.contains_conventional(this_id) {
+            if !dom.contains(this_id) {
                 continue;
             }
 
@@ -419,7 +427,7 @@ fn verify_dominance_relationships(
                     // Verify that uses of phis / collect nodes are dominated
                     // by the corresponding region / join nodes, respectively.
                     Node::Phi { control, data: _ } | Node::Collect { control, data: _ } => {
-                        if dom.contains_conventional(this_id) && !dom.does_dom(control, this_id) {
+                        if dom.contains(this_id) && !dom.does_dom(control, this_id) {
                             Err(format!(
                                 "{} node (ID {}) doesn't dominate its use (ID {}).",
                                 function.nodes[pred_idx].upper_case_name(),
@@ -431,7 +439,7 @@ fn verify_dominance_relationships(
                     // Verify that uses of thread ID nodes are dominated by the
                     // corresponding fork nodes.
                     Node::ThreadID { control } => {
-                        if dom.contains_conventional(this_id) && !dom.does_dom(control, this_id) {
+                        if dom.contains(this_id) && !dom.does_dom(control, this_id) {
                             Err(format!(
                                 "ThreadID node (ID {}) doesn't dominate its use (ID {}).",
                                 pred_idx,
@@ -445,7 +453,7 @@ fn verify_dominance_relationships(
                         // flows through the collect node out of the fork-join,
                         // because after the collect, the thread ID is no longer
                         // considered an immediate control output use.
-                        if postdom.contains_conventional(this_id)
+                        if postdom.contains(this_id)
                             && !postdom.does_dom(*fork_join_map.get(&control).unwrap(), this_id)
                         {
                             Err(format!("ThreadID node's (ID {}) fork's join doesn't postdominate its use (ID {}).", pred_idx, this_id.idx()))?;
diff --git a/hercules_opt/Cargo.toml b/hercules_opt/Cargo.toml
new file mode 100644
index 0000000000000000000000000000000000000000..47bd9bd5dd8c40935c9cf708661d4c8bfe2e3b83
--- /dev/null
+++ b/hercules_opt/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "hercules_opt"
+version = "0.1.0"
+authors = ["Russel Arbore <rarbore2@illinois.edu>"]
+
+[dependencies]
+hercules_ir = { path = "../hercules_ir" }
diff --git a/hercules_ir/src/ccp.rs b/hercules_opt/src/ccp.rs
similarity index 93%
rename from hercules_ir/src/ccp.rs
rename to hercules_opt/src/ccp.rs
index ded30b1ff0f10f63f9f80b649fc63807c318367c..fc5bc2166fce93c3213f3d1ef7265971fad68b51 100644
--- a/hercules_ir/src/ccp.rs
+++ b/hercules_opt/src/ccp.rs
@@ -1,7 +1,11 @@
+extern crate hercules_ir;
+
 use std::collections::HashMap;
 use std::iter::zip;
 
-use crate::*;
+use self::hercules_ir::dataflow::*;
+use self::hercules_ir::def_use::*;
+use self::hercules_ir::ir::*;
 
 /*
  * The ccp lattice tracks, for each node, the following information:
@@ -134,7 +138,7 @@ pub fn ccp(
     reverse_postorder: &Vec<NodeID>,
 ) {
     // Step 1: run ccp analysis to understand the function.
-    let result = forward_dataflow_global(&function, reverse_postorder, |inputs, node_id| {
+    let result = dataflow_global(&function, reverse_postorder, |inputs, node_id| {
         ccp_flow_function(inputs, node_id, &function, &constants)
     });
 
@@ -268,6 +272,10 @@ pub fn ccp(
             // remove this branch node.
             if let None = reachable_users.next() {
                 // The user is a ReadProd node, which in turn has one user.
+                assert!(
+                    def_use.get_users(*the_reachable_user).len() == 1,
+                    "Control ReadProd node doesn't have exactly one user."
+                );
                 let target = def_use.get_users(*the_reachable_user)[0];
 
                 // For each use in the target of the reachable ReadProd, turn it
@@ -287,6 +295,76 @@ pub fn ccp(
             }
         }
     }
+
+    // Step 4: collapse region chains.
+    collapse_region_chains(function, def_use);
+}
+
+/*
+ * Top level function to collapse region chains. A chain is a list of at least
+ * one region node that takes only one control input. Region chains can be
+ * deleted. The use of the head of the chain can turn into the use by the user
+ * of the tail of the chain.
+ */
+pub fn collapse_region_chains(function: &mut Function, def_use: &ImmutableDefUseMap) {
+    // Loop over all region nodes. It's fine to modify the function as we loop
+    // over it.
+    for id in (0..function.nodes.len()).map(NodeID::new) {
+        if let Node::Region { preds } = &function.nodes[id.idx()] {
+            if preds.len() == 1 {
+                // Step 1: bridge gap between use and user.
+                let predecessor = preds[0];
+                let successor = def_use
+                    .get_users(id)
+                    .iter()
+                    .filter(|x| !function.nodes[x.idx()].is_phi())
+                    .next()
+                    .expect("Region node doesn't have a non-phi user.");
+
+                // Set successor's use of this region to use the region's use.
+                for u in get_uses_mut(&mut function.nodes[successor.idx()]).as_mut() {
+                    if **u == id {
+                        **u = predecessor;
+                    }
+                }
+
+                // Delete this region.
+                function.nodes[id.idx()] = Node::Start;
+
+                // Step 2: bridge gap between uses and users of corresponding
+                // phi nodes.
+                let phis: Vec<NodeID> = def_use
+                    .get_users(id)
+                    .iter()
+                    .map(|x| *x)
+                    .filter(|x| function.nodes[x.idx()].is_phi())
+                    .collect();
+                for phi_id in phis {
+                    let data_uses =
+                        if let Node::Phi { control, data } = &function.nodes[phi_id.idx()] {
+                            assert!(*control == id);
+                            data
+                        } else {
+                            panic!()
+                        };
+                    assert!(data_uses.len() == 1, "Phi node doesn't have exactly one data use, while corresponding region had exactly one control use.");
+                    let predecessor = data_uses[0];
+
+                    // Set successors' use of this phi to use the phi's use.
+                    for successor in def_use.get_users(phi_id) {
+                        for u in get_uses_mut(&mut function.nodes[successor.idx()]).as_mut() {
+                            if **u == phi_id {
+                                **u = predecessor;
+                            }
+                        }
+                    }
+
+                    // Delete this phi.
+                    function.nodes[phi_id.idx()] = Node::Start;
+                }
+            }
+        }
+    }
 }
 
 fn ccp_flow_function(
diff --git a/hercules_ir/src/dce.rs b/hercules_opt/src/dce.rs
similarity index 94%
rename from hercules_ir/src/dce.rs
rename to hercules_opt/src/dce.rs
index 1f56d8648a11f707019353ba9493d32e0005113b..255402902aa6ec0bcc240cbb7bd18a9eb8945526 100644
--- a/hercules_ir/src/dce.rs
+++ b/hercules_opt/src/dce.rs
@@ -1,4 +1,7 @@
-use crate::*;
+extern crate hercules_ir;
+
+use self::hercules_ir::def_use::*;
+use self::hercules_ir::ir::*;
 
 /*
  * Top level function to run dead code elimination. Deletes nodes by setting
diff --git a/hercules_ir/src/gvn.rs b/hercules_opt/src/gvn.rs
similarity index 97%
rename from hercules_ir/src/gvn.rs
rename to hercules_opt/src/gvn.rs
index c8f77244d0b12d90c4a395c69c318287d379cad9..e8337e609b3ae881c3e8a012d9cd09e212eee2c1 100644
--- a/hercules_ir/src/gvn.rs
+++ b/hercules_opt/src/gvn.rs
@@ -1,6 +1,9 @@
+extern crate hercules_ir;
+
 use std::collections::HashMap;
 
-use crate::*;
+use self::hercules_ir::def_use::*;
+use self::hercules_ir::ir::*;
 
 /*
  * Top level function to run global value numbering. In the sea of nodes, GVN is
diff --git a/hercules_opt/src/lib.rs b/hercules_opt/src/lib.rs
new file mode 100644
index 0000000000000000000000000000000000000000..309c4e2393057ef5460d2bf3eaf4465f016ffdaf
--- /dev/null
+++ b/hercules_opt/src/lib.rs
@@ -0,0 +1,7 @@
+pub mod ccp;
+pub mod dce;
+pub mod gvn;
+
+pub use crate::ccp::*;
+pub use crate::dce::*;
+pub use crate::gvn::*;
diff --git a/hercules_tools/Cargo.toml b/hercules_tools/Cargo.toml
index 458de0e0e86e18f89a3bbb41b5a19321413f5a35..412f1022638722bd15023dbb289642634d654733 100644
--- a/hercules_tools/Cargo.toml
+++ b/hercules_tools/Cargo.toml
@@ -7,7 +7,13 @@ authors = ["Russel Arbore <rarbore2@illinois.edu>"]
 name = "hercules_dot"
 path = "src/hercules_dot/main.rs"
 
+[[bin]]
+name = "hercules_cpu"
+path = "src/hercules_cpu/main.rs"
+
 [dependencies]
 clap = { version = "*", features = ["derive"] }
 hercules_ir = { path = "../hercules_ir" }
+hercules_opt = { path = "../hercules_opt" }
+hercules_codegen = { path = "../hercules_codegen" }
 rand = "*"
diff --git a/hercules_tools/src/hercules_cpu/main.rs b/hercules_tools/src/hercules_cpu/main.rs
new file mode 100644
index 0000000000000000000000000000000000000000..bb0a49963f2b0422aa97489addf1bcbefa241c16
--- /dev/null
+++ b/hercules_tools/src/hercules_cpu/main.rs
@@ -0,0 +1,77 @@
+extern crate clap;
+
+use std::fs::File;
+use std::io::prelude::*;
+
+use clap::Parser;
+
+#[derive(Parser, Debug)]
+#[command(author, version, about, long_about = None)]
+struct Args {
+    hir_file: String,
+
+    #[arg(short, long, default_value_t = String::new())]
+    output: String,
+}
+
+fn main() {
+    let args = Args::parse();
+    if !args.hir_file.ends_with(".hir") {
+        eprintln!("WARNING: Running hercules_cpu on a file without a .hir extension - interpreting as a textual Hercules IR file.");
+    }
+
+    let mut file = File::open(args.hir_file).expect("PANIC: Unable to open input file.");
+    let mut contents = String::new();
+    file.read_to_string(&mut contents)
+        .expect("PANIC: Unable to read input file contents.");
+    let mut module =
+        hercules_ir::parse::parse(&contents).expect("PANIC: Failed to parse Hercules IR file.");
+    let (def_uses, reverse_postorders, _typing, _subgraphs, _doms, _postdoms, _fork_join_maps) =
+        hercules_ir::verify::verify(&mut module)
+            .expect("PANIC: Failed to verify Hercules IR module.");
+
+    let mut module = module.map(
+        |(mut function, id), (types, mut constants, dynamic_constants)| {
+            hercules_opt::ccp::ccp(
+                &mut function,
+                &mut constants,
+                &def_uses[id.idx()],
+                &reverse_postorders[id.idx()],
+            );
+            hercules_opt::dce::dce(&mut function);
+            function.delete_gravestones();
+
+            let def_use = hercules_ir::def_use::def_use(&function);
+            hercules_opt::gvn::gvn(&mut function, &constants, &def_use);
+            hercules_opt::dce::dce(&mut function);
+            function.delete_gravestones();
+
+            (function, (types, constants, dynamic_constants))
+        },
+    );
+    let (def_uses, reverse_postorders, _typing, subgraphs, doms, _postdoms, fork_join_maps) =
+        hercules_ir::verify::verify(&mut module)
+            .expect("PANIC: Failed to verify Hercules IR module.");
+
+    let bbs: Vec<_> = module
+        .functions
+        .iter()
+        .enumerate()
+        .map(|(idx, function)| {
+            hercules_codegen::gcm::gcm(
+                function,
+                &def_uses[idx],
+                &reverse_postorders[idx],
+                &subgraphs[idx],
+                &doms[idx],
+                &fork_join_maps[idx],
+            )
+            .iter()
+            .map(|id| id.idx())
+            .enumerate()
+            .collect::<Vec<_>>()
+        })
+        .collect();
+
+    println!("{:?}", bbs);
+}
diff --git a/hercules_tools/src/hercules_dot/dot.rs b/hercules_tools/src/hercules_dot/dot.rs
index 6f41f85b9336fb0f071be20b3274b53723bc1544..f71a40f18fbae210d1111e19e9094a9f5f12dd97 100644
--- a/hercules_tools/src/hercules_dot/dot.rs
+++ b/hercules_tools/src/hercules_dot/dot.rs
@@ -67,7 +67,7 @@ pub fn write_dot<W: Write>(
         // Step 2: draw dominance edges in dark green. Don't draw post dominance
         // edges because then xdot lays out the graph strangely.
         let dom = &doms[function_id.idx()];
-        for (child_id, parent_id) in dom.get_underlying_map() {
+        for (child_id, (_, parent_id)) in dom.get_underlying_map() {
             write_edge(
                 *child_id,
                 function_id,
diff --git a/hercules_tools/src/hercules_dot/main.rs b/hercules_tools/src/hercules_dot/main.rs
index bb3efe5794d48bec125397814114cf452d50560b..e3543fe5b6e860a4456ea20fe60a2529ea2031d3 100644
--- a/hercules_tools/src/hercules_dot/main.rs
+++ b/hercules_tools/src/hercules_dot/main.rs
@@ -34,30 +34,30 @@ fn main() {
         .expect("PANIC: Unable to read input file contents.");
     let mut module =
         hercules_ir::parse::parse(&contents).expect("PANIC: Failed to parse Hercules IR file.");
-    let (def_uses, reverse_postorders, _typing, _doms, _postdoms, _fork_join_maps) =
+    let (def_uses, reverse_postorders, _typing, _subgraphs, _doms, _postdoms, _fork_join_maps) =
         hercules_ir::verify::verify(&mut module)
             .expect("PANIC: Failed to verify Hercules IR module.");
 
     let mut module = module.map(
         |(mut function, id), (types, mut constants, dynamic_constants)| {
-            hercules_ir::ccp::ccp(
+            hercules_opt::ccp::ccp(
                 &mut function,
                 &mut constants,
                 &def_uses[id.idx()],
                 &reverse_postorders[id.idx()],
             );
-            hercules_ir::dce::dce(&mut function);
+            hercules_opt::dce::dce(&mut function);
             function.delete_gravestones();
 
             let def_use = hercules_ir::def_use::def_use(&function);
-            hercules_ir::gvn::gvn(&mut function, &constants, &def_use);
-            hercules_ir::dce::dce(&mut function);
+            hercules_opt::gvn::gvn(&mut function, &constants, &def_use);
+            hercules_opt::dce::dce(&mut function);
             function.delete_gravestones();
 
             (function, (types, constants, dynamic_constants))
         },
     );
-    let (_def_use, _reverse_postorders, typing, doms, _postdoms, fork_join_maps) =
+    let (_def_uses, _reverse_postorders, typing, _subgraphs, doms, _postdoms, fork_join_maps) =
         hercules_ir::verify::verify(&mut module)
             .expect("PANIC: Failed to verify Hercules IR module.");