From c4ef507b5b83a128942c2a8007d1e38bcc1ef486 Mon Sep 17 00:00:00 2001
From: rarbore2 <rarbore2@illinois.edu>
Date: Mon, 6 May 2024 14:53:59 -0500
Subject: [PATCH] CPU partition respecting backend

---
 .gitignore                                    |    2 +-
 Cargo.lock                                    |  204 +++-
 Cargo.toml                                    |    3 +-
 hercules_cg/Cargo.toml                        |    2 +-
 hercules_cg/src/common.rs                     |  525 +++++++++
 hercules_cg/src/cpu.rs                        | 1001 +++++++++++++++++
 hercules_cg/src/cpu_beta.rs                   |    1 +
 hercules_cg/src/lib.rs                        |    8 +
 hercules_cg/src/top.rs                        |  135 +++
 hercules_ir/Cargo.toml                        |    3 +-
 hercules_ir/src/ir.rs                         |   54 +-
 hercules_ir/src/lib.rs                        |    2 +-
 hercules_ir/src/schedule.rs                   |   19 +-
 hercules_ir/src/subgraph.rs                   |    6 +
 hercules_opt/Cargo.toml                       |    3 +
 hercules_opt/src/pass.rs                      |   85 +-
 hercules_rt/Cargo.toml                        |    3 +
 hercules_rt/src/lib.rs                        |   37 +-
 hercules_rt/src/manifest.rs                   |   97 ++
 hercules_samples/matmul/src/main.rs           |   10 +-
 hercules_samples/sum_sample.hir               |    6 +-
 hercules_samples/task_parallel.hir            |   14 +
 .../Cargo.toml                                |    2 +-
 .../src/main.rs                               |    2 +-
 hercules_tools/hercules_driver/Cargo.toml     |   10 +
 hercules_tools/hercules_driver/src/main.rs    |   44 +
 26 files changed, 2186 insertions(+), 92 deletions(-)
 create mode 100644 hercules_cg/src/common.rs
 create mode 100644 hercules_cg/src/cpu.rs
 create mode 100644 hercules_cg/src/top.rs
 create mode 100644 hercules_rt/src/manifest.rs
 create mode 100644 hercules_samples/task_parallel.hir
 rename hercules_tools/{hercules_cpu => hercules_cpu_beta}/Cargo.toml (91%)
 rename hercules_tools/{hercules_cpu => hercules_cpu_beta}/src/main.rs (94%)
 create mode 100644 hercules_tools/hercules_driver/Cargo.toml
 create mode 100644 hercules_tools/hercules_driver/src/main.rs

diff --git a/.gitignore b/.gitignore
index 959fc7f6..278d4690 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,5 +5,5 @@
 *.ll
 *.c
 *.o
-
+*.hbin
 .*.swp
diff --git a/Cargo.lock b/Cargo.lock
index 1ec60bdb..c48af1ea 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,9 +4,9 @@ version = 3
 
 [[package]]
 name = "aho-corasick"
-version = "1.1.2"
+version = "1.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
 dependencies = [
  "memchr",
 ]
@@ -61,15 +61,30 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.80"
+version = "1.0.81"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247"
+
+[[package]]
+name = "atomic-polyfill"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1"
+checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4"
+dependencies = [
+ "critical-section",
+]
 
 [[package]]
 name = "autocfg"
-version = "1.1.0"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80"
+
+[[package]]
+name = "base64"
+version = "0.21.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
 
 [[package]]
 name = "bincode"
@@ -86,6 +101,15 @@ version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
+[[package]]
+name = "bitflags"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "bitvec"
 version = "1.0.1"
@@ -98,6 +122,12 @@ dependencies = [
  "wyz",
 ]
 
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
 [[package]]
 name = "cactus"
 version = "1.0.7"
@@ -126,9 +156,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.2"
+version = "4.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b230ab84b0ffdf890d5a10abdbc8b83ae1c4918275daea1ab8801f71536b2651"
+checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -148,9 +178,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.0"
+version = "4.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47"
+checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -164,12 +194,24 @@ version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
 
+[[package]]
+name = "cobs"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
+
 [[package]]
 name = "colorchoice"
 version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
 
+[[package]]
+name = "critical-section"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7059fff8937831a9ae6f0fe4d658ffabf58f2ca96aa9dec1c889f936f705f216"
+
 [[package]]
 name = "deranged"
 version = "0.3.11"
@@ -180,13 +222,10 @@ dependencies = [
 ]
 
 [[package]]
-name = "ena"
-version = "0.14.2"
+name = "embedded-io"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1"
-dependencies = [
- "log",
-]
+checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced"
 
 [[package]]
 name = "equivalent"
@@ -238,29 +277,52 @@ dependencies = [
  "wasi",
 ]
 
+[[package]]
+name = "hash32"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67"
+dependencies = [
+ "byteorder",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.14.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
 
+[[package]]
+name = "heapless"
+version = "0.7.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f"
+dependencies = [
+ "atomic-polyfill",
+ "hash32",
+ "rustc_version",
+ "serde",
+ "spin",
+ "stable_deref_trait",
+]
+
 [[package]]
 name = "heck"
-version = "0.4.1"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
 [[package]]
 name = "hercules_cg"
 version = "0.1.0"
 dependencies = [
  "bitvec",
- "ena",
  "hercules_ir",
+ "hercules_rt",
 ]
 
 [[package]]
-name = "hercules_cpu"
+name = "hercules_cpu_beta"
 version = "0.1.0"
 dependencies = [
  "clap",
@@ -280,6 +342,16 @@ dependencies = [
  "rand",
 ]
 
+[[package]]
+name = "hercules_driver"
+version = "0.1.0"
+dependencies = [
+ "clap",
+ "hercules_ir",
+ "hercules_opt",
+ "ron",
+]
+
 [[package]]
 name = "hercules_ir"
 version = "0.1.0"
@@ -288,6 +360,7 @@ dependencies = [
  "nom",
  "ordered-float",
  "rand",
+ "serde",
 ]
 
 [[package]]
@@ -304,8 +377,11 @@ name = "hercules_opt"
 version = "0.1.0"
 dependencies = [
  "bitvec",
+ "hercules_cg",
  "hercules_ir",
  "ordered-float",
+ "postcard",
+ "serde",
  "take_mut",
 ]
 
@@ -313,14 +389,17 @@ dependencies = [
 name = "hercules_rt"
 version = "0.1.0"
 dependencies = [
+ "hercules_ir",
  "libc",
+ "postcard",
+ "serde",
 ]
 
 [[package]]
 name = "indexmap"
-version = "2.2.5"
+version = "2.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4"
+checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
 dependencies = [
  "equivalent",
  "hashbrown",
@@ -328,9 +407,9 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.10"
+version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
+checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
 
 [[package]]
 name = "juno_frontend"
@@ -358,10 +437,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
 
 [[package]]
-name = "log"
-version = "0.4.21"
+name = "lock_api"
+version = "0.4.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
+checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
 
 [[package]]
 name = "lrlex"
@@ -482,6 +565,18 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "postcard"
+version = "1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a55c51ee6c0db07e68448e336cf8ea4131a620edefebf9893e759b2d793420f8"
+dependencies = [
+ "cobs",
+ "embedded-io",
+ "heapless",
+ "serde",
+]
+
 [[package]]
 name = "powerfmt"
 version = "0.2.0"
@@ -496,9 +591,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.78"
+version = "1.0.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
+checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
 dependencies = [
  "unicode-ident",
 ]
@@ -554,19 +649,19 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
 ]
 
 [[package]]
 name = "regex"
-version = "1.10.3"
+version = "1.10.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
+checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
 dependencies = [
  "aho-corasick",
  "memchr",
  "regex-automata",
- "regex-syntax 0.8.2",
+ "regex-syntax 0.8.3",
 ]
 
 [[package]]
@@ -577,7 +672,7 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-syntax 0.8.2",
+ "regex-syntax 0.8.3",
 ]
 
 [[package]]
@@ -588,9 +683,21 @@ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
+checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
+
+[[package]]
+name = "ron"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94"
+dependencies = [
+ "base64",
+ "bitflags 2.5.0",
+ "serde",
+ "serde_derive",
+]
 
 [[package]]
 name = "rustc_version"
@@ -607,6 +714,12 @@ version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
 
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
 [[package]]
 name = "semver"
 version = "1.0.22"
@@ -645,6 +758,21 @@ dependencies = [
  "vob",
 ]
 
+[[package]]
+name = "spin"
+version = "0.9.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
+dependencies = [
+ "lock_api",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
 [[package]]
 name = "static_assertions"
 version = "1.1.0"
@@ -659,9 +787,9 @@ checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01"
 
 [[package]]
 name = "syn"
-version = "2.0.52"
+version = "2.0.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07"
+checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/Cargo.toml b/Cargo.toml
index d625efb3..bded2dcf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,8 +6,9 @@ members = [
 	"hercules_opt",
 	"hercules_rt",
 
+	"hercules_tools/hercules_driver",
 	"hercules_tools/hercules_dot",
-	"hercules_tools/hercules_cpu",
+	"hercules_tools/hercules_cpu_beta",
 
   "juno_frontend",
 
diff --git a/hercules_cg/Cargo.toml b/hercules_cg/Cargo.toml
index 8a4cf940..b5479669 100644
--- a/hercules_cg/Cargo.toml
+++ b/hercules_cg/Cargo.toml
@@ -5,5 +5,5 @@ authors = ["Russel Arbore <rarbore2@illinois.edu>"]
 
 [dependencies]
 bitvec = "*"
-ena = "*"
 hercules_ir = { path = "../hercules_ir" }
+hercules_rt = { path = "../hercules_rt" }
diff --git a/hercules_cg/src/common.rs b/hercules_cg/src/common.rs
new file mode 100644
index 00000000..9bc91b57
--- /dev/null
+++ b/hercules_cg/src/common.rs
@@ -0,0 +1,525 @@
+extern crate hercules_ir;
+extern crate hercules_rt;
+
+use std::collections::HashMap;
+
+use self::hercules_ir::*;
+use self::hercules_rt::manifest::*;
+
+/*
+ * Pretty much all of the codegen functions need to take in some large subset of
+ * IR structures, analysis results, and global pieces of information. Package
+ * them all in this struct, and make all the codegen functions members of this
+ * struct to cut down on the number of function arguments. This structure
+ * shouldn't be modified after creation.
+ */
+pub(crate) struct FunctionContext<'a> {
+    pub(crate) function: &'a Function,
+    pub(crate) types: &'a Vec<Type>,
+    pub(crate) constants: &'a Vec<Constant>,
+    pub(crate) dynamic_constants: &'a Vec<DynamicConstant>,
+    pub(crate) def_use: &'a ImmutableDefUseMap,
+    pub(crate) reverse_postorder: &'a Vec<NodeID>,
+    pub(crate) typing: &'a Vec<TypeID>,
+    pub(crate) control_subgraph: &'a Subgraph,
+    pub(crate) fork_join_map: &'a HashMap<NodeID, NodeID>,
+    pub(crate) fork_join_nest: &'a HashMap<NodeID, Vec<NodeID>>,
+    pub(crate) antideps: &'a Vec<(NodeID, NodeID)>,
+    pub(crate) bbs: &'a Vec<NodeID>,
+    pub(crate) plan: &'a Plan,
+    pub(crate) llvm_types: &'a Vec<String>,
+    pub(crate) llvm_constants: &'a Vec<String>,
+    pub(crate) llvm_dynamic_constants: &'a Vec<String>,
+    pub(crate) partitions_inverted_map: Vec<Vec<NodeID>>,
+}
+
+impl<'a> FunctionContext<'a> {
+    /*
+     * Find data inputs to a partition.
+     */
+    pub(crate) fn partition_data_inputs(&self, partition_id: PartitionID) -> Vec<NodeID> {
+        let partition = &self.partitions_inverted_map[partition_id.idx()];
+
+        let mut data_inputs: Vec<NodeID> = partition
+            .iter()
+            .map(|id| {
+                // For each node in the partition, filter out the uses that are
+                // data nodes and are in a different partition.
+                get_uses(&self.function.nodes[id.idx()])
+                    .as_ref()
+                    .into_iter()
+                    .filter(|id| {
+                        // Filter out control nodes (just looking for data
+                        // inputs here), check that it's in another partition,
+                        // and ignore parameters, constants, and dynamic
+                        // constants (those are each passed to partition
+                        // functions using different mechanisms).
+                        !self.function.nodes[id.idx()].is_control()
+                            && self.plan.partitions[id.idx()] != partition_id
+                            && !self.function.nodes[id.idx()].is_parameter()
+                            && !self.function.nodes[id.idx()].is_constant()
+                            && !self.function.nodes[id.idx()].is_dynamic_constant()
+                    })
+                    .map(|x| *x)
+                    .collect::<Vec<NodeID>>()
+            })
+            // Collect all such uses across the whole partition.
+            .flatten()
+            .collect();
+
+        // Inputs and outputs of partitions need to be sorted so datums don't
+        // get mixed up.
+        data_inputs.sort();
+        data_inputs
+    }
+
+    /*
+     * Find data outputs of a partition.
+     */
+    pub(crate) fn partition_data_outputs(&self, partition_id: PartitionID) -> Vec<NodeID> {
+        let partition = &self.partitions_inverted_map[partition_id.idx()];
+
+        let mut data_outputs: Vec<NodeID> = partition
+            .iter()
+            .filter(|id| {
+                // For each data node in the partition, check if it has any uses
+                // outside its partition. Users can be control or data nodes.
+                // Also, don't add parameter, constant, and dynamic constant
+                // nodes. These nodes are passed to partition mechanisms using
+                // different mechanism.
+                !self.function.nodes[id.idx()].is_control()
+                    && !self.function.nodes[id.idx()].is_parameter()
+                    && !self.function.nodes[id.idx()].is_constant()
+                    && !self.function.nodes[id.idx()].is_dynamic_constant()
+                    && self
+                        .def_use
+                        .get_users(**id)
+                        .as_ref()
+                        .into_iter()
+                        .filter(|id| self.plan.partitions[id.idx()] != partition_id)
+                        .map(|x| *x)
+                        .count()
+                        > 0
+            })
+            .map(|x| *x)
+            // If this partition contains a return node, the data input of that
+            // node is a data output.
+            .chain(partition.iter().filter_map(|id| {
+                if let Node::Return { control: _, data } = self.function.nodes[id.idx()] {
+                    Some(data)
+                } else {
+                    None
+                }
+            }))
+            .collect();
+
+        // Inputs and outputs of partitions need to be sorted so datums don't
+        // get mixed up.
+        data_outputs.sort();
+        data_outputs
+    }
+
+    /*
+     * Find control nodes that will return from a partition.
+     */
+    pub(crate) fn partition_control_returns(&self, partition_id: PartitionID) -> Vec<NodeID> {
+        let partition = &self.partitions_inverted_map[partition_id.idx()];
+
+        partition
+            .iter()
+            .filter(|id| {
+                // For each control node in the partition, check if it has any
+                // users outside its partition. Users can be control nodes - if
+                // a user in a different partition is a data node, then the
+                // partition is malformed. Return nodes are also unconditionally
+                // a control return of this partition.
+                let outside_user_count = self
+                    .def_use
+                    .get_users(**id)
+                    .as_ref()
+                    .into_iter()
+                    .filter(|user_id| {
+                        // Users of control nodes can only be data nodes
+                        // if they are in the same partition as the
+                        // control node. Only control users may be in a
+                        // different partition.
+                        assert!(
+                            !self.function.nodes[id.idx()].is_control()
+                                || self.function.nodes[user_id.idx()].is_control()
+                                || self.plan.partitions[user_id.idx()] == partition_id
+                        );
+                        self.plan.partitions[user_id.idx()] != partition_id
+                    })
+                    .count();
+
+                // Just calculated for the below assert.
+                let control_user_count = self
+                    .def_use
+                    .get_users(**id)
+                    .as_ref()
+                    .into_iter()
+                    .filter(|id| self.function.nodes[id.idx()].is_control())
+                    .count();
+
+                // A control node cannot have users inside and outside its own
+                // partition. This is because a well-formedness condition of if
+                // and match nodes (the only control nodes allowed to have
+                // multiple users) is their read successors must be in the same
+                // partition as them.
+                assert!(
+                    !self.function.nodes[id.idx()].is_control()
+                        || outside_user_count == 0
+                        || outside_user_count == control_user_count
+                );
+                self.function.nodes[id.idx()].is_control()
+                    && (self.function.nodes[id.idx()].is_return() || outside_user_count > 0)
+            })
+            .map(|x| *x)
+            .collect()
+    }
+
+    /*
+     * Find control successors of a given partition. A partition cannot be a
+     * control successor of itself, since a self-cycle is represented as control
+     * flow within a partiion. In other words, the graph of control flow between
+     * partitions is free of self-loops (an edge connecting a partition to
+     * itself).
+     */
+    pub(crate) fn partition_control_successors(
+        &self,
+        partition_id: PartitionID,
+    ) -> Vec<PartitionID> {
+        let partition = &self.partitions_inverted_map[partition_id.idx()];
+
+        let mut partitions: Vec<PartitionID> = partition
+            .iter()
+            // Only consider nodes in other partitions that are successors of
+            // control nodes. These are necessarily other control nodes.
+            .filter(|id| self.function.nodes[id.idx()].is_control())
+            .map(|id| {
+                // Get the partitions (that are not this partition) of successor
+                // nodes of control nodes.
+                self.def_use
+                    .get_users(*id)
+                    .as_ref()
+                    .into_iter()
+                    .map(|id| self.plan.partitions[id.idx()])
+                    .filter(|id| *id != partition_id)
+            })
+            // We want a flat list of all such partitions.
+            .flatten()
+            .collect();
+
+        // We only want one copy of the ID per partition.
+        partitions.dedup();
+        partitions
+    }
+
+    /*
+     * Calculate the reverse postorder of just this partition.
+     */
+    pub(crate) fn partition_reverse_postorder(&self, partition_id: PartitionID) -> Vec<NodeID> {
+        self.reverse_postorder
+            .iter()
+            .filter(|id| self.plan.partitions[id.idx()] == partition_id)
+            .map(|x| *x)
+            .collect()
+    }
+
+    /*
+     * Determine the array constant inputs to all partition functions. Get the
+     * constant IDs, and the array type IDs. Sort by constant ID for
+     * consistency.
+     */
+    pub(crate) fn partition_array_constant_inputs(&self) -> Vec<(ConstantID, TypeID)> {
+        let mut res = (0..self.constants.len())
+            .filter_map(|idx| {
+                self.constants[idx]
+                    .try_array_type(self.types)
+                    .map(|ty_id| (ConstantID::new(idx), ty_id))
+            })
+            .collect::<Vec<_>>();
+
+        res.sort();
+        res
+    }
+
+    /*
+     * Determine the dynamic constant inputs to all partition functions. Just
+     * assemble the dynamic constant IDs, since the type is always u64. Sort the
+     * parameters for consistency.
+     */
+    pub(crate) fn partition_dynamic_constant_inputs(&self) -> Vec<DynamicConstantID> {
+        let mut res = (0..self.dynamic_constants.len())
+            .filter_map(|idx| {
+                if self.dynamic_constants[idx].is_parameter() {
+                    Some(DynamicConstantID::new(idx))
+                } else {
+                    None
+                }
+            })
+            .collect::<Vec<_>>();
+
+        res.sort();
+        res
+    }
+}
+
+/*
+ * When emitting individual nodes in the partition codegen functions, a bunch of
+ * partition analysis results are needed. Package them all in this struct, and
+ * make all of the subroutines of the top level partition codegen functions
+ * members of this struct to cut down on the number of function arguments. This
+ * structure shouldn't be modified after creation. This structure only holds per
+ * partition specific information - for example, global function parameters,
+ * constant parameters, and dynamic constant parameters are not stored, since
+ * those don't vary across partitions.
+ */
+pub(crate) struct PartitionContext<'a> {
+    pub(crate) function: &'a FunctionContext<'a>,
+    pub(crate) partition_id: PartitionID,
+    pub(crate) top_node: NodeID,
+    pub(crate) data_inputs: Vec<NodeID>,
+    pub(crate) data_outputs: Vec<NodeID>,
+    pub(crate) control_returns: Vec<NodeID>,
+    pub(crate) reverse_postorder: Vec<NodeID>,
+    pub(crate) partition_input_types: Vec<TypeID>,
+    pub(crate) return_type: Type,
+    pub(crate) manifest: PartitionManifest,
+}
+
+impl<'a> PartitionContext<'a> {
+    pub(crate) fn new(
+        function: &'a FunctionContext<'a>,
+        partition_id: PartitionID,
+        top_node: NodeID,
+    ) -> Self {
+        let data_inputs = function.partition_data_inputs(partition_id);
+        let data_outputs = function.partition_data_outputs(partition_id);
+        let control_returns = function.partition_control_returns(partition_id);
+        let control_successors = function.partition_control_successors(partition_id);
+        let reverse_postorder = function.partition_reverse_postorder(partition_id);
+
+        // The data input types are just the types of data nodes used by this
+        // partition, originating in another partition.
+        let partition_input_types = data_inputs
+            .iter()
+            .map(|id| function.typing[id.idx()])
+            .collect();
+
+        // The return struct contains all of the data outputs, plus control
+        // information if there are multiple successor partitions. The control
+        // information is used by the Hercules runtime to implement control flow
+        // between partitions.
+        let multiple_control_successors = control_successors.len() > 1;
+        let output_data_types = data_outputs.iter().map(|id| function.typing[id.idx()]);
+        let return_type = if multiple_control_successors {
+            let u64_ty_id = TypeID::new(
+                function
+                    .types
+                    .iter()
+                    .position(|ty| *ty == Type::UnsignedInteger64)
+                    .unwrap(),
+            );
+            Type::Product(
+                output_data_types
+                    .chain(std::iter::once(u64_ty_id))
+                    .collect(),
+            )
+        } else {
+            Type::Product(output_data_types.collect())
+        };
+
+        // Assemble the manifest.
+        let mut manifest = PartitionManifest::default();
+        manifest.top_node = top_node.idx() as u32;
+
+        // The first inputs are the data inputs, from other partitions.
+        manifest.inputs.extend(
+            data_inputs
+                .iter()
+                .map(|x| PartitionInput::DataInput(x.idx() as u32)),
+        );
+
+        // The next inputs are the function parameters, all in order.
+        manifest.inputs.extend(
+            (0..function.function.param_types.len())
+                .map(|x| PartitionInput::FunctionArgument(x as u32)),
+        );
+
+        // The next inputs are the array constants, all in order.
+        manifest.inputs.extend(
+            (0..(function
+                .constants
+                .iter()
+                .filter(|cons| cons.try_array_type(function.types).is_some())
+                .count()))
+                .map(|x| PartitionInput::ArrayConstant(x as u32)),
+        );
+
+        // The last inputs are the dynamic constants, all in order.
+        manifest.inputs.extend(
+            (0..function.function.num_dynamic_constants)
+                .map(|x| PartitionInput::DynamicConstant(x as u32)),
+        );
+
+        // The outputs are the data outputs of this partition.
+        manifest.outputs.extend(
+            data_outputs
+                .iter()
+                .map(|x| PartitionOutput::DataOutput(x.idx() as u32)),
+        );
+
+        // If there are multiple control returns, also output the node being
+        // returned from.
+        if multiple_control_successors {
+            manifest.outputs.push(PartitionOutput::ControlIndicator);
+        }
+
+        PartitionContext {
+            function,
+            partition_id,
+            top_node,
+            data_inputs,
+            data_outputs,
+            control_returns,
+            reverse_postorder,
+            partition_input_types,
+            return_type,
+            manifest,
+        }
+    }
+}
+
+/*
+ * Types, constants, and dynamic constants are fairly simple to translate into
+ * LLVM IR.
+ */
+
+pub(crate) fn generate_type_string(ty: &Type, llvm_types: &Vec<String>) -> String {
+    match ty {
+        Type::Control(_) => {
+            // Later, we create virtual registers corresponding to fork nodes of
+            // type i64, so we need the "type" of the fork node to be i64.
+            "i64".to_string()
+        }
+        Type::Boolean => "i1".to_string(),
+        Type::Integer8 | Type::UnsignedInteger8 => "i8".to_string(),
+        Type::Integer16 | Type::UnsignedInteger16 => "i16".to_string(),
+        Type::Integer32 | Type::UnsignedInteger32 => "i32".to_string(),
+        Type::Integer64 | Type::UnsignedInteger64 => "i64".to_string(),
+        Type::Float32 => "float".to_string(),
+        Type::Float64 => "double".to_string(),
+        // Because we traverse in bottom-up order, we can assume that the LLVM
+        // types for children types are already computed.
+        Type::Product(fields) => {
+            let mut iter = fields.iter();
+            if let Some(first) = iter.next() {
+                iter.fold("{".to_string() + &llvm_types[first.idx()], |s, f| {
+                    s + ", " + &llvm_types[f.idx()]
+                }) + "}"
+            } else {
+                "{}".to_string()
+            }
+        }
+        Type::Array(_, _) => {
+            // Array types becomes pointers. The element type and dynamic
+            // constant bounds characterize the access code we generate later,
+            // not the type itself.
+            "ptr".to_string()
+        }
+        Type::Summation(_) => todo!(),
+    }
+}
+
+pub(crate) fn generate_type_strings(module: &Module) -> Vec<String> {
+    // Render types into LLVM IR. This requires translating from our interning
+    // structures to LLVM types. We can't just blow through the types vector,
+    // since a type may reference a type ID ahead of it in the vector. Instead,
+    // iterate types in a bottom up order with respect to the type intern DAGs.
+    let mut llvm_types = vec!["".to_string(); module.types.len()];
+    for id in module.types_bottom_up() {
+        llvm_types[id.idx()] = generate_type_string(&module.types[id.idx()], &llvm_types);
+    }
+
+    llvm_types
+}
+
+pub(crate) fn generate_constant_string(
+    cons_id: ConstantID,
+    cons: &Constant,
+    tys: &Vec<Type>,
+    llvm_constants: &Vec<String>,
+) -> String {
+    match cons {
+        Constant::Boolean(val) => {
+            if *val {
+                "true".to_string()
+            } else {
+                "false".to_string()
+            }
+        }
+        Constant::Integer8(val) => format!("{}", val),
+        Constant::Integer16(val) => format!("{}", val),
+        Constant::Integer32(val) => format!("{}", val),
+        Constant::Integer64(val) => format!("{}", val),
+        Constant::UnsignedInteger8(val) => format!("{}", val),
+        Constant::UnsignedInteger16(val) => format!("{}", val),
+        Constant::UnsignedInteger32(val) => format!("{}", val),
+        Constant::UnsignedInteger64(val) => format!("{}", val),
+        Constant::Float32(val) => {
+            if val.fract() == 0.0 {
+                format!("{}.0", val)
+            } else {
+                format!("{}", val)
+            }
+        }
+        Constant::Float64(val) => {
+            if val.fract() == 0.0 {
+                format!("{}.0", val)
+            } else {
+                format!("{}", val)
+            }
+        }
+        Constant::Product(_, _) | Constant::Summation(_, _, _) | Constant::Array(_, _) => {
+            format!("%cons.{}", cons_id.idx())
+        }
+        Constant::Zero(ty_id) => match tys[ty_id.idx()] {
+            Type::Product(_) | Type::Summation(_) | Type::Array(_, _) => {
+                format!("%cons.{}", cons_id.idx())
+            }
+            _ => "zeroinitializer".to_string(),
+        },
+    }
+}
+
+pub(crate) fn generate_constant_strings(module: &Module) -> Vec<String> {
+    // Render constants into LLVM IR. This is done in a very similar manner as
+    // types.
+    let mut llvm_constants = vec!["".to_string(); module.constants.len()];
+    for id in module.constants_bottom_up() {
+        llvm_constants[id.idx()] = generate_constant_string(
+            id,
+            &module.constants[id.idx()],
+            &module.types,
+            &llvm_constants,
+        );
+    }
+
+    llvm_constants
+}
+
+pub(crate) fn generate_dynamic_constant_strings(module: &Module) -> Vec<String> {
+    // Render dynamic constants into LLVM IR.
+    let mut llvm_dynamic_constants = vec!["".to_string(); module.dynamic_constants.len()];
+    for id in (0..module.dynamic_constants.len()).map(DynamicConstantID::new) {
+        match &module.dynamic_constants[id.idx()] {
+            DynamicConstant::Constant(val) => llvm_dynamic_constants[id.idx()] = format!("{}", val),
+            DynamicConstant::Parameter(_) => {
+                llvm_dynamic_constants[id.idx()] = format!("%dyn_cons.{}", id.idx())
+            }
+        }
+    }
+
+    llvm_dynamic_constants
+}
diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs
new file mode 100644
index 00000000..4f4726a7
--- /dev/null
+++ b/hercules_cg/src/cpu.rs
@@ -0,0 +1,1001 @@
+extern crate bitvec;
+extern crate hercules_ir;
+extern crate hercules_rt;
+
+use std::collections::HashMap;
+use std::collections::VecDeque;
+
+use std::iter::zip;
+
+use std::fmt::Write;
+
+use self::bitvec::prelude::*;
+
+use self::hercules_ir::*;
+use self::hercules_rt::manifest::*;
+
+use crate::*;
+
+/*
+ * When assembling LLVM basic blocks, we traverse the nodes in a partition in an
+ * ad-hoc order. Thus, we cannot assume block terminators will be visited after
+ * data nodes, for example. However, textual LLVM IR requires that the
+ * terminator instruction is last. So, we emit nodes into separate strings of
+ * LLVM IR that will get stichted together when the block is complete.
+ */
+#[derive(Debug)]
+struct LLVMBlock {
+    header: String,
+    phis: String,
+    data: String,
+    terminator: String,
+}
+
+impl<'a> FunctionContext<'a> {
+    /*
+     * Top level function to generate code for a partition, targeting the CPU.
+     */
+    pub(crate) fn codegen_cpu_partition<W: Write>(
+        &self,
+        top_node: NodeID,
+        w: &mut W,
+    ) -> Result<PartitionManifest, std::fmt::Error> {
+        // Step 1: do some analysis to get a bunch of per-partition information.
+        let partition_id = self.plan.partitions[top_node.idx()];
+        let partition_context = PartitionContext::new(self, partition_id, top_node);
+
+        // Step 2: emit the function signature. The partition function
+        // parameters are the function parameters, the partition data inputs,
+        // the array constant pointers, and the dynamic constants.
+        let mut partition_function_parameters = partition_context
+            // The data inputs to this partition. These are the data values
+            // calculated in a different partition in the same function.
+            .partition_input_types
+            .iter()
+            .enumerate()
+            .map(|(idx, ty_id)| {
+                (
+                    self.llvm_types[ty_id.idx()].clone(),
+                    format!("%part_arg.{}", idx),
+                )
+            })
+            // The input types of the overall function.
+            .chain(
+                self.function
+                    .param_types
+                    .iter()
+                    .enumerate()
+                    .map(|(idx, ty_id)| {
+                        (
+                            self.llvm_types[ty_id.idx()].clone(),
+                            format!("%func_arg.{}", idx),
+                        )
+                    }),
+            )
+            // Array constants are passed in, pre-initialized.
+            .chain(
+                self.partition_array_constant_inputs()
+                    .into_iter()
+                    .map(|(id, ty_id)| {
+                        (
+                            self.llvm_types[ty_id.idx()].clone(),
+                            format!("%cons.{}", id.idx()),
+                        )
+                    }),
+            )
+            // Dynamic constants are passed in, since they are only known right
+            // before runtime.
+            .chain(
+                self.partition_dynamic_constant_inputs()
+                    .into_iter()
+                    .map(|id| ("i64".to_string(), format!("%dyn_cons.{}", id.idx()))),
+            );
+
+        write!(
+            w,
+            "define {} @{}_part_{}(",
+            generate_type_string(&partition_context.return_type, &self.llvm_types),
+            self.function.name,
+            partition_id.idx(),
+        )?;
+        let (first_ty, first_param) = partition_function_parameters.next().unwrap();
+        write!(w, "{} {}", first_ty, first_param)?;
+        for (ty, param) in partition_function_parameters {
+            write!(w, ", {} {}", ty, param)?;
+        }
+        write!(w, ") {{\n")?;
+
+        // Step 3: set up basic blocks. A node represents a basic block if its
+        // entry in the basic blocks vector points to itself.
+        let mut llvm_bbs = HashMap::new();
+        for id in &self.partitions_inverted_map[partition_id.idx()] {
+            if self.bbs[id.idx()] == *id {
+                llvm_bbs.insert(
+                    id,
+                    LLVMBlock {
+                        header: format!("bb_{}:\n", id.idx()),
+                        phis: "".to_string(),
+                        data: "".to_string(),
+                        terminator: "".to_string(),
+                    },
+                );
+            }
+        }
+
+        // Step 4: emit nodes. Nodes are emitted into basic blocks separately as
+        // nodes are not necessarily emitted in order. Assemble worklist of
+        // nodes, starting as reverse post order of nodes. For non-phi and non-
+        // reduce nodes, only emit once all data uses are emitted. In addition,
+        // consider additional anti-dependence edges from read to write nodes.
+        let mut visited = bitvec![u8, Lsb0; 0; self.function.nodes.len()];
+        let mut worklist = VecDeque::from(partition_context.reverse_postorder.clone());
+        while let Some(id) = worklist.pop_front() {
+            if !(self.function.nodes[id.idx()].is_phi()
+                || self.function.nodes[id.idx()].is_reduce())
+                && !get_uses(&self.function.nodes[id.idx()])
+                    .as_ref()
+                    .into_iter()
+                    // If this node isn't a phi or reduce, we need to check that
+                    // all uses, as well as all reads we anti-depend with, have
+                    // been emitted.
+                    .chain(self.antideps.iter().filter_map(|(read, write)| {
+                        if id == *write {
+                            Some(read)
+                        } else {
+                            None
+                        }
+                    }))
+                    // Only data dependencies inside this partition need to have
+                    // already been visited.
+                    .all(|id| {
+                        self.plan.partitions[id.idx()] != partition_id
+                            || self.function.nodes[id.idx()].is_control()
+                            || visited[id.idx()]
+                    })
+            {
+                // Skip emitting node if it's not a phi or reduce node and if
+                // its data uses are not emitted yet.
+                worklist.push_back(id);
+            } else {
+                // Once all of the data dependencies for this node are emitted,
+                // this node can be emitted. For reduce nodes specifically, we
+                // want to emit the phi in the fork's basic block, not the
+                // join's, so we handle that ugly case here. This is because
+                // there is a fundamental mismatch between Hercules' notion of
+                // reductions and LLVM's phi nodes. This is ok, since we can
+                // translate between the two. It's just a pain.
+                let bb = if let Node::Reduce {
+                    control,
+                    init: _,
+                    reduct: _,
+                } = self.function.nodes[id.idx()]
+                {
+                    // Figure out the fork corresponding to the associated join.
+                    let fork_id = if let Node::Join { control } = self.function.nodes[control.idx()]
+                    {
+                        if let Type::Control(factors) =
+                            &self.types[self.typing[control.idx()].idx()]
+                        {
+                            *factors.last().unwrap()
+                        } else {
+                            panic!("PANIC: Type of join node associated with reduce node is not a control type.")
+                        }
+                    } else {
+                        panic!("PANIC: Node associated with reduce node isn't a join node.")
+                    };
+
+                    // Emit in the basic block of the fork.
+                    llvm_bbs.get_mut(&self.bbs[fork_id.idx()]).unwrap()
+                } else {
+                    // In the normal case, emit in the basic block the node has
+                    // been actually assigned to.
+                    llvm_bbs.get_mut(&self.bbs[id.idx()]).unwrap()
+                };
+                partition_context.codegen_cpu_node(id, bb)?;
+                visited.set(id.idx(), true);
+            }
+        }
+
+        // Step 5: emit the now completed basic blocks, in order. Emit a dummy
+        // header block to unconditionally jump to the "top" basic block.
+        write!(w, "bb_header:\n  br label %bb_{}\n", top_node.idx())?;
+        for id in partition_context.reverse_postorder {
+            if self.bbs[id.idx()] == id {
+                write!(
+                    w,
+                    "{}{}{}{}",
+                    llvm_bbs[&id].header,
+                    llvm_bbs[&id].phis,
+                    llvm_bbs[&id].data,
+                    llvm_bbs[&id].terminator
+                )?;
+            }
+        }
+
+        // Step 6: close the partition function - we're done. The partition
+        // manifest is created by the partition context.
+        write!(w, "}}\n\n")?;
+        Ok(partition_context.manifest)
+    }
+}
+
+impl<'a> PartitionContext<'a> {
+    /*
+     * Emit LLVM IR implementing a single node.
+     */
+    fn codegen_cpu_node(&self, id: NodeID, bb: &mut LLVMBlock) -> std::fmt::Result {
+        // Helper to emit code to index a collection. All collections are
+        // pointers to some memory at the LLVM IR level. This memory is passed
+        // in as a parameter for anything involving arrays, and is alloca-ed for
+        // product and summation types.
+        let mut generate_index_code = |collect: NodeID, indices: &[Index]| -> std::fmt::Result {
+            // Step 1: calculate the list of collection types corresponding to
+            // each index.
+            let mut collection_ty_ids = vec![];
+            let mut curr_ty_id = self.function.typing[collect.idx()];
+            for index in indices {
+                match (index, &self.function.types[curr_ty_id.idx()]) {
+                    (Index::Field(idx), Type::Product(ty_ids))
+                    | (Index::Variant(idx), Type::Summation(ty_ids)) => {
+                        collection_ty_ids.push(curr_ty_id);
+                        curr_ty_id = ty_ids[*idx];
+                    }
+                    (Index::Position(_), Type::Array(elem_ty_id, _)) => {
+                        collection_ty_ids.push(curr_ty_id);
+                        curr_ty_id = *elem_ty_id;
+                    }
+                    _ => {
+                        panic!("PANIC: Found unsupported combination of index and collection type.")
+                    }
+                }
+            }
+            assert!(
+                self.function.types[curr_ty_id.idx()].is_primitive(),
+                "PANIC: Cannot generate partial indexing code."
+            );
+
+            // Step 2: calculate, as LLVM IR values, the stride and offset
+            // needed at each level of the collection. For products, the stride
+            // is calculated using a getelementptr hack (and is the size of the
+            // struct), and the offset corresponds to the field index (which is
+            // translated to an offset using another getelementptr hack). For
+            // arrays, the stride is the dynamic constant extent multiplied by
+            // the stride of the element type, and the offset is the position
+            // index multiplied by the stride of the element type. Additionally,
+            // emit code to add up all of the offsets to get a total offset into
+            // the collection. TODO: to support summations, and arrays in
+            // arbitrary places, we need to not use the hacky getelementptr
+            // technique, since LLVM IR can't represent arrays (in the Hercules
+            // sense) or summations as primitive types. Instead, we need to do
+            // collection memory layout entirely ourselves.
+            let elem_llvm_ty = &self.function.llvm_types[curr_ty_id.idx()];
+            write!(bb.data, "  %index{}.{}.total_offset = add i64 0, 0\n  %index{}.{}.stride.ptrhack = getelementptr {}, ptr null, i64 1\n  %index{}.{}.stride = ptrtoint ptr %index{}.{}.stride.ptrhack to i64\n",
+                   id.idx(), indices.len(), id.idx(), indices.len(), elem_llvm_ty, id.idx(), indices.len(), id.idx(), indices.len()
+            )?;
+            for (idx, index) in indices.into_iter().enumerate().rev() {
+                match index {
+                    Index::Field(field) => {
+                        let product_llvm_ty =
+                            &self.function.llvm_types[collection_ty_ids[idx].idx()];
+                        write!(
+                            bb.data,
+                            "  %index{}.{}.stride.ptrhack = getelementptr {}, ptr null, i64 1\n  %index{}.{}.stride = ptrtoint ptr %index{}.{}.stride.ptrhack to i64\n  %index{}.{}.offset.ptrhack = getelementptr {}, ptr null, i64 0, i64 {}\n  %index{}.{}.offset = ptrtoint ptr %index{}.{}.offset.ptrhack to i64\n",
+                            id.idx(), idx,
+                            product_llvm_ty,
+                            id.idx(), idx,
+                            id.idx(), idx,
+                            id.idx(), idx,
+                            product_llvm_ty,
+                            field,
+                            id.idx(), idx,
+                            id.idx(), idx,
+                        )?;
+                    }
+                    Index::Variant(_) => todo!(),
+                    Index::Position(position) => {
+                        let array_extents = self.function.types[collection_ty_ids[idx].idx()]
+                            .try_extents()
+                            .unwrap();
+
+                        // TODO: calculate stride for arrays, needed for arrays
+                        // nested in other collections.
+                        write!(bb.data, "  %index{}.{}.offset.add.0 = add ", id.idx(), idx)?;
+                        self.cpu_emit_use_of_node(position[0], Some(id), true, &mut bb.data)?;
+                        write!(bb.data, ", {}\n", 0)?;
+                        for (dim_idx, (extent_dc_id, position_id)) in
+                            zip(array_extents, position.into_iter()).enumerate().skip(1)
+                        {
+                            write!(
+                                bb.data,
+                                "  %index{}.{}.offset.mul.{} = mul i64 {}, %index{}.{}.offset.add.{}\n",
+                                id.idx(), idx,
+                                dim_idx,
+                                self.function.llvm_dynamic_constants[extent_dc_id.idx()],
+                                id.idx(), idx,
+                                dim_idx - 1
+                            )?;
+                            write!(
+                                bb.data,
+                                "  %index{}.{}.offset.add.{} = add ",
+                                id.idx(),
+                                idx,
+                                dim_idx
+                            )?;
+                            self.cpu_emit_use_of_node(*position_id, Some(id), true, &mut bb.data)?;
+                            write!(
+                                bb.data,
+                                ", %index{}.{}.offset.mul.{}\n",
+                                id.idx(),
+                                idx,
+                                dim_idx
+                            )?;
+                        }
+                        write!(bb.data, "  %index{}.{}.offset = mul i64 %index{}.{}.stride, %index{}.{}.offset.add.{}\n", id.idx(), idx, id.idx(), idx + 1, id.idx(), idx, position.len() - 1)?;
+                    }
+                    Index::Control(_) => panic!(
+                        "PANIC: Found control index when generating collection indexing code."
+                    ),
+                }
+                write!(
+                    bb.data,
+                    "  %index{}.{}.total_offset = add i64 %index{}.{}.total_offset, %index{}.{}.offset\n",
+                    id.idx(), idx,
+                    id.idx(), idx + 1,
+                    id.idx(), idx
+                )?;
+            }
+
+            // Step 3: emit the getelementptr using the total collection offset.
+            write!(bb.data, "  %index{} = getelementptr i8, ", id.idx(),)?;
+            self.cpu_emit_use_of_node(collect, Some(id), true, &mut bb.data)?;
+            write!(bb.data, ", i64 %index{}.0.total_offset\n", id.idx())?;
+
+            Ok(())
+        };
+
+        // Helper to find the basic block corresponding to a particular control
+        // predecessor, for phi nodes. This is needed for when a predecessor
+        // basic block is in a different partition. In this case, the phi's
+        // control predecessor is set to the top block of the partition.
+        let get_phi_predecessor = |pred_id: NodeID| {
+            if self.function.plan.partitions[pred_id.idx()] == self.partition_id {
+                format!("{}", self.function.bbs[pred_id.idx()].idx())
+            } else {
+                format!("header")
+            }
+        };
+
+        // Emit the primary IR for each node.
+        match self.function.function.nodes[id.idx()] {
+            Node::Start | Node::Region { preds: _ } => {
+                // Basic blocks containing a start or region node branch
+                // unconditionally to their single successor.
+                let successor = self
+                    .function
+                    .def_use
+                    .get_users(id)
+                    .iter()
+                    .filter(|id| self.function.function.nodes[id.idx()].is_strictly_control())
+                    .next()
+                    .unwrap();
+                bb.terminator = format!("  br label %bb_{}\n", successor.idx());
+            }
+            Node::If { control: _, cond } => {
+                let successors = self.function.def_use.get_users(id);
+
+                // Determine the order of the successors (true/false or false/
+                // true) in the successors slice.
+                let rev = if let Node::Read {
+                    collect: _,
+                    indices,
+                } = &self.function.function.nodes[successors[0].idx()]
+                {
+                    indices[0] != Index::Control(0)
+                } else {
+                    panic!("PANIC: Successor of if node isn't a read node.")
+                };
+                bb.terminator = "  br ".to_string();
+                self.cpu_emit_use_of_node(cond, Some(id), true, &mut bb.terminator)?;
+                write!(
+                    bb.terminator,
+                    ", label %bb_{}, label %bb_{}\n",
+                    successors[(!rev) as usize].idx(),
+                    successors[rev as usize].idx()
+                )?;
+            }
+            Node::Fork { control, factor: _ } => {
+                // Calculate the join and successor.
+                let join = self.function.fork_join_map[&id];
+                let successor = self
+                    .function
+                    .def_use
+                    .get_users(id)
+                    .iter()
+                    .filter(|id| self.function.function.nodes[id.idx()].is_strictly_control())
+                    .next()
+                    .unwrap();
+
+                // Create the phi node for the loop index. This is used directly
+                // by any thread ID user nodes. The control predecessor basic
+                // blocks are the control node preceding the fork and the
+                // corresponding join.
+                write!(bb.phis, "  ")?;
+                self.cpu_emit_use_of_node(id, None, false, &mut bb.phis)?;
+                write!(
+                    bb.phis,
+                    " = phi i64 [ 0, %bb_{} ], [ %fork_inc{}, %bb_{} ]\n",
+                    get_phi_predecessor(self.function.bbs[control.idx()]),
+                    id.idx(),
+                    get_phi_predecessor(self.function.bbs[join.idx()]),
+                )?;
+
+                // Increment the loop index by one each iteration.
+                write!(bb.data, "  %fork_inc{} = add i64 1, ", id.idx())?;
+                self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?;
+                write!(bb.data, "\n")?;
+
+                // Branch to the successor basic block.
+                write!(
+                    bb.terminator,
+                    "  br label %bb_{}\n",
+                    self.function.bbs[successor.idx()].idx()
+                )?;
+            }
+            Node::Join { control } => {
+                // Get the fork, it's factor, and the successor to this join.
+                let fork_id = if let Type::Control(factors) =
+                    &self.function.types[self.function.typing[control.idx()].idx()]
+                {
+                    *factors.last().unwrap()
+                } else {
+                    panic!("PANIC: The type of a join node is incorrect.")
+                };
+                let factor = if let Node::Fork { control: _, factor } =
+                    &self.function.function.nodes[fork_id.idx()]
+                {
+                    *factor
+                } else {
+                    panic!("PANIC: The node referenced by the control type of a join node is not a fork.")
+                };
+                let successor = self
+                    .function
+                    .def_use
+                    .get_users(id)
+                    .iter()
+                    .filter(|id| self.function.function.nodes[id.idx()].is_strictly_control())
+                    .next()
+                    .unwrap();
+
+                // Form the bottom of the loop. Check if the loop is finished,
+                // and branch between the successor and the fork. The structure
+                // of this loop implies that fork-joins have to iterate at least
+                // once. Change the loop termination branch target if this is a
+                // control return (see comment below for more details).
+                let is_control_return = self.control_returns.contains(&id);
+                write!(
+                    bb.terminator,
+                    "  %join_cond{} = icmp ult i64 %fork_inc{}, {}\n",
+                    id.idx(),
+                    fork_id.idx(),
+                    self.function.llvm_dynamic_constants[factor.idx()]
+                )?;
+                write!(
+                    bb.terminator,
+                    "  br i1 %join_cond{}, label %bb_{}, label %bb_{}\n",
+                    id.idx(),
+                    self.function.bbs[fork_id.idx()].idx(),
+                    if is_control_return {
+                        format!("{}_join_cr", id.idx())
+                    } else {
+                        format!("{}", self.function.bbs[successor.idx()].idx())
+                    }
+                )?;
+
+                // Join nodes are the only node that can be a control return
+                // from a partition and generate a conditional branch. This
+                // means we have to do this really ugly hack where we insert
+                // another basic block to be the control return that we
+                // conditionally branch to. Other control nodes that may be
+                // control returns don't have this problem, because they always
+                // unconditionally branch to their destination. We add this LLVM
+                // IR text of a new basic block in the terminator of the current
+                // basic block, since we don't have mutable access here to the
+                // set of all LLVM basic blocks.
+                if is_control_return {
+                    write!(bb.terminator, "bb_{}_join_cr:\n", id.idx())?;
+                }
+            }
+            Node::Phi {
+                control: _,
+                ref data,
+            } => {
+                // For each predecessor of the associated region, we determine
+                // if that predecessor is in this partition or not. If so, then
+                // the predecessor control is just the basic block of the
+                // predecessor control node. If not, the predecessor control is
+                // the first basic block of the partition. The corresponding
+                // datum also needs to be provided by argument to the partition,
+                // and this is handled by cpu_emit_use_of_node.
+                let pred_ids =
+                    get_uses(&self.function.function.nodes[self.function.bbs[id.idx()].idx()]);
+                let mut control_datum_pairs = zip(data.into_iter(), pred_ids.as_ref().iter())
+                    .map(|(datum, pred_id)| (*datum, get_phi_predecessor(*pred_id)));
+
+                // TODO: this code burns my eyes to look at, it might be worth
+                // making this not carcinogenic.
+                write!(bb.phis, "  ")?;
+                self.cpu_emit_use_of_node(id, None, false, &mut bb.phis)?;
+                write!(
+                    bb.phis,
+                    " = phi {} [ ",
+                    self.function.llvm_types[self.function.typing[id.idx()].idx()]
+                )?;
+                let (first_data, first_control) = control_datum_pairs.next().unwrap();
+                self.cpu_emit_use_of_node(first_data, Some(id), false, &mut bb.phis)?;
+                write!(bb.phis, ", %bb_{} ]", first_control)?;
+                for (data, control) in control_datum_pairs {
+                    write!(bb.phis, ", [ ")?;
+                    self.cpu_emit_use_of_node(data, Some(id), false, &mut bb.phis)?;
+                    write!(bb.phis, ", %bb_{} ]", control)?;
+                }
+                write!(bb.phis, "\n")?;
+            }
+            Node::ThreadID { control } => {
+                // Just bitcast the loop index from the fork. The bitcast is a
+                // no-op, but we add it to copy the value from the virtual
+                // register the fork generates to the virtual register
+                // corresponding to this thread ID node.
+                assert!(self.function.function.nodes[control.idx()].is_fork());
+                write!(bb.data, "  ")?;
+                self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?;
+                write!(bb.data, " = bitcast i64 ",)?;
+                self.cpu_emit_use_of_node(control, Some(id), false, &mut bb.data)?;
+                write!(bb.data, " to i64\n",)?;
+            }
+            Node::Reduce {
+                control,
+                init,
+                reduct,
+            } => {
+                // Figure out the fork corresponding to the associated join.
+                let fork_id = if let Node::Join { control } =
+                    self.function.function.nodes[control.idx()]
+                {
+                    if let Type::Control(factors) =
+                        &self.function.types[self.function.typing[control.idx()].idx()]
+                    {
+                        *factors.last().unwrap()
+                    } else {
+                        panic!("PANIC: Type of join node associated with reduce node is not a control type.")
+                    }
+                } else {
+                    panic!("PANIC: Node associated with reduce node isn't a join node.")
+                };
+
+                // Figure out the fork's predecessor.
+                let pred = if let Node::Fork { control, factor: _ } =
+                    self.function.function.nodes[fork_id.idx()]
+                {
+                    control
+                } else {
+                    panic!("PANIC: Node referenced in type of join node associated with a reduce node is not a fork node.")
+                };
+
+                // Reduce nodes just lower to phi nodes. We already did the ugly
+                // hack so that "bb" refers to the basic block of the fork,
+                // rather than the join. So, now we just need to emit the phi.
+                write!(bb.phis, "  ")?;
+                self.cpu_emit_use_of_node(id, Some(id), false, &mut bb.phis)?;
+                write!(
+                    bb.phis,
+                    " = phi {} [ ",
+                    self.function.llvm_types[self.function.typing[id.idx()].idx()]
+                )?;
+                self.cpu_emit_use_of_node(init, Some(id), false, &mut bb.phis)?;
+                write!(
+                    bb.phis,
+                    ", %bb_{} ], [ ",
+                    get_phi_predecessor(self.function.bbs[pred.idx()])
+                )?;
+                self.cpu_emit_use_of_node(reduct, Some(id), false, &mut bb.phis)?;
+                write!(
+                    bb.phis,
+                    ", %bb_{} ]\n",
+                    get_phi_predecessor(self.function.bbs[control.idx()])
+                )?;
+            }
+            // These nodes are handled by other mechanisms in the code lowering
+            // process.
+            Node::Return {
+                control: _,
+                data: _,
+            }
+            | Node::Parameter { index: _ }
+            | Node::Constant { id: _ }
+            | Node::DynamicConstant { id: _ } => {}
+            Node::Binary { left, right, op } => {
+                let op = match op {
+                    BinaryOperator::Add => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
+                            "fadd"
+                        } else {
+                            "add"
+                        }
+                    }
+                    BinaryOperator::Sub => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
+                            "fsub"
+                        } else {
+                            "sub"
+                        }
+                    }
+                    BinaryOperator::Mul => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
+                            "fmul"
+                        } else {
+                            "mul"
+                        }
+                    }
+                    BinaryOperator::Div => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
+                            "fdiv"
+                        } else if self.function.types[self.function.typing[left.idx()].idx()]
+                            .is_unsigned()
+                        {
+                            "udiv"
+                        } else {
+                            "sdiv"
+                        }
+                    }
+                    BinaryOperator::Rem => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
+                            "frem"
+                        } else if self.function.types[self.function.typing[left.idx()].idx()]
+                            .is_unsigned()
+                        {
+                            "urem"
+                        } else {
+                            "srem"
+                        }
+                    }
+                    BinaryOperator::LT => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
+                            "fcmp olt"
+                        } else if self.function.types[self.function.typing[left.idx()].idx()]
+                            .is_unsigned()
+                        {
+                            "icmp ult"
+                        } else {
+                            "icmp slt"
+                        }
+                    }
+                    BinaryOperator::LTE => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
+                            "fcmp ole"
+                        } else if self.function.types[self.function.typing[left.idx()].idx()]
+                            .is_unsigned()
+                        {
+                            "icmp ule"
+                        } else {
+                            "icmp sle"
+                        }
+                    }
+                    BinaryOperator::GT => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
+                            "fcmp ogt"
+                        } else if self.function.types[self.function.typing[left.idx()].idx()]
+                            .is_unsigned()
+                        {
+                            "icmp ugt"
+                        } else {
+                            "icmp sgt"
+                        }
+                    }
+                    BinaryOperator::GTE => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
+                            "fcmp oge"
+                        } else if self.function.types[self.function.typing[left.idx()].idx()]
+                            .is_unsigned()
+                        {
+                            "icmp uge"
+                        } else {
+                            "icmp sge"
+                        }
+                    }
+                    BinaryOperator::EQ => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
+                            "fcmp oeq"
+                        } else {
+                            "icmp eq"
+                        }
+                    }
+                    BinaryOperator::NE => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_float() {
+                            "fcmp one"
+                        } else {
+                            "icmp ne"
+                        }
+                    }
+                    BinaryOperator::Or => "or",
+                    BinaryOperator::And => "and",
+                    BinaryOperator::Xor => "xor",
+                    BinaryOperator::LSh => "lsh",
+                    BinaryOperator::RSh => {
+                        if self.function.types[self.function.typing[left.idx()].idx()].is_unsigned()
+                        {
+                            "lshr"
+                        } else {
+                            "ashr"
+                        }
+                    }
+                };
+                write!(bb.data, "  ")?;
+                self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?;
+                write!(bb.data, " = {} ", op)?;
+                self.cpu_emit_use_of_node(left, Some(id), true, &mut bb.data)?;
+                write!(bb.data, ", ")?;
+                self.cpu_emit_use_of_node(right, Some(id), false, &mut bb.data)?;
+                write!(bb.data, "\n")?;
+            }
+            Node::Read {
+                collect,
+                ref indices,
+            } => {
+                if self.function.function.nodes[collect.idx()].is_strictly_control() {
+                    // Read nodes may be projection succesors of if or match
+                    // nodes.
+                    let successor = self.function.def_use.get_users(id)[0];
+                    write!(
+                        bb.terminator,
+                        "  br label %bb_{}\n",
+                        self.function.bbs[successor.idx()].idx()
+                    )?;
+                } else {
+                    generate_index_code(collect, indices)?;
+                    write!(bb.data, "  ")?;
+                    self.cpu_emit_use_of_node(id, Some(id), false, &mut bb.data)?;
+                    write!(
+                        bb.data,
+                        " = load {}, ptr %index{}\n",
+                        self.function.llvm_types[self.function.typing[id.idx()].idx()],
+                        id.idx(),
+                    )?;
+                }
+            }
+            Node::Write {
+                collect,
+                data,
+                ref indices,
+            } => {
+                generate_index_code(collect, indices)?;
+                write!(
+                    bb.data,
+                    "  store {} ",
+                    self.function.llvm_types[self.function.typing[data.idx()].idx()]
+                )?;
+                self.cpu_emit_use_of_node(data, Some(id), false, &mut bb.data)?;
+                write!(bb.data, ", ptr %index{}\n", id.idx())?;
+
+                // We can't just "copy" in LLVM IR, but we want to forward the
+                // pointer, unchanged, as the "output" of this write node. The
+                // easiest way to do this is to insert a useless bitcast.
+                write!(bb.data, "  ")?;
+                self.cpu_emit_use_of_node(id, None, false, &mut bb.data)?;
+                write!(bb.data, " = bitcast ptr ")?;
+                self.cpu_emit_use_of_node(collect, Some(id), false, &mut bb.data)?;
+                write!(bb.data, " to ptr\n")?;
+            }
+            _ => {
+                eprintln!("TO LOWER: {:?}", self.function.function.nodes[id.idx()]);
+            }
+        }
+
+        // If this node is a control return, we emit a return from this
+        // partition function.
+        if self.control_returns.contains(&id) {
+            // Get rid of the old terminator, replace with return. Don't do this
+            // if this node is a join node, since in that specific case we
+            // generate specific control return logic. See the join node codegen
+            // above for more details.
+            if !self.function.function.nodes[id.idx()].is_join() {
+                bb.terminator.clear();
+            }
+
+            // Making structs from the aggregated values in LLVM IR is a pain.
+            // We need to, one-by-one, insertvalue each element into the struct.
+            let ret_ty_str = generate_type_string(&self.return_type, &self.function.llvm_types);
+            for (idx, data_output_id) in self.data_outputs.iter().enumerate() {
+                write!(
+                    bb.terminator,
+                    "  %ret_agg{}.{} = insertvalue {} {}, ",
+                    id.idx(),
+                    idx,
+                    ret_ty_str,
+                    if idx == 0 {
+                        "undef".to_string()
+                    } else {
+                        format!("%ret_agg{}.{}", id.idx(), idx - 1)
+                    }
+                )?;
+                let mut data_output_id = *data_output_id;
+
+                // Handle reduce specially here. Technically, the "user" here is
+                // the join node, so cpu_emit_use_of_node would normally emit
+                // the reduce node's virtual register directly. However, if a
+                // data output is the result of a reduce node, that is
+                // definitely outside for the corresponding fork-join. Thus, we
+                // actually need to use the reduction use of the reduce node.
+                // This all only applies if the reduce node is in the current
+                // partition. If not, then use the reduce node as the argument
+                // to cpu_emit_use_of_node as normal, so that the partition
+                // function argument is properly used.
+                while let Node::Reduce {
+                    control: _,
+                    init: _,
+                    reduct,
+                } = self.function.function.nodes[data_output_id.idx()]
+                    && self.partition_id == self.function.plan.partitions[data_output_id.idx()]
+                {
+                    data_output_id = reduct;
+                }
+                self.cpu_emit_use_of_node(data_output_id, None, true, &mut bb.terminator)?;
+                write!(bb.terminator, ", {}\n", idx)?;
+            }
+
+            // Now, we can return the aggregate value we calculated.
+            if self.data_outputs.is_empty() && self.control_returns.len() == 1 {
+                // If there are no data outputs, just return the empty struct.
+                write!(bb.terminator, "  ret {} zeroinitializer\n", ret_ty_str)?;
+            } else if self.data_outputs.is_empty() {
+                // If there are multiple control returns, we need to return the
+                // node ID of the control return, so that the runtime can do
+                // control flow between partitions. In this case, there aren't
+                // any data outputs that also need to be returned.
+                write!(bb.terminator, "  %ret_agg{}.ctrl_pos = insertvalue {} undef, i64 {}, 0\n  ret {} %ret_agg{}.ctrl_pos\n",
+                       id.idx(),
+                       ret_ty_str,
+                       id.idx(),
+                       ret_ty_str,
+                       id.idx()
+                )?;
+            } else if self.control_returns.len() == 1 {
+                // In the normal case, we return the struct containing just the
+                // data outputs.
+                write!(
+                    bb.terminator,
+                    "  ret {} %ret_agg{}.{}\n",
+                    ret_ty_str,
+                    id.idx(),
+                    self.data_outputs.len() - 1,
+                )?;
+            } else {
+                // If there are multiple control returns from this partition and
+                // there are data outputs, we add the control return node ID to
+                // the return aggregate.
+                write!(
+                    bb.terminator,
+                    "  %ret_agg{}.ctrl_pos = insertvalue {} %ret_agg{}.{}, i64 {}, {}\n  ret {} %ret_agg{}.ctrl_pos\n",
+                    id.idx(),
+                    ret_ty_str,
+                    id.idx(),
+                    self.data_outputs.len() - 1,
+                    id.idx(),
+                    self.data_outputs.len(),
+                    ret_ty_str,
+                    id.idx(),
+                )?;
+            }
+        }
+
+        Ok(())
+    }
+
+    /*
+     * Emit the LLVM value corresponding to a node. Optionally prefix with the
+     * LLVM type, which is required by textual LLVM IR in a few places.
+     * Optionally provide the node that will be using this emission. This is
+     * unused by all emitted node values except reduce nodes, which require the
+     * user argument to be given. We chose this interface because at the
+     * callsite of a cpu_emit_use_of_node, it is always known whether this thing
+     * being emitted could (or should) possibly be a reduce node. If not, then
+     * providing none gives a nice early panic when it is a reduce node, either
+     * because the developer misjudged or because there is a bug.
+     */
+    fn cpu_emit_use_of_node<W: Write>(
+        &self,
+        id: NodeID,
+        user: Option<NodeID>,
+        emit_type: bool,
+        w: &mut W,
+    ) -> std::fmt::Result {
+        // First, emit the type before the value (if applicable).
+        if emit_type {
+            write!(
+                w,
+                "{} ",
+                self.function.llvm_types[self.function.typing[id.idx()].idx()]
+            )?;
+        }
+
+        // Emitting the value can be surprisingly complicated, depending on what
+        // the node is. For example, partition arguments are emitted specially.
+        if let Some(input_idx) = self.data_inputs.iter().position(|inp_id| *inp_id == id) {
+            // If a use is in another partition, it needs to get passed to this
+            // partition's function as a parameter.
+            write!(w, "%part_arg.{}", input_idx)?;
+        } else {
+            match self.function.function.nodes[id.idx()] {
+                // Parameter nodes in this partition also represent parameters
+                // to this partition function.
+                Node::Parameter { index } => write!(w, "%func_arg.{}", index)?,
+                // Constants are pre-defined.
+                Node::Constant { id } => write!(w, "{}", self.function.llvm_constants[id.idx()])?,
+                Node::DynamicConstant { id } => {
+                    write!(w, "{}", self.function.llvm_dynamic_constants[id.idx()])?
+                }
+                // Reduce nodes, as usual, are not nice to handle. We need to
+                // emit different LLVM depending on whether the user is inside
+                // or outside the reduce's corresponding fork-join nest. Inside,
+                // we emit as usual, since the user needs to use the phi node
+                // inside the reduction loop. Outside, we need to use the reduct
+                // use of the reduce node, so that we don't grab the reduction
+                // variable one loop iteration too early.
+                Node::Reduce {
+                    control,
+                    init: _,
+                    reduct,
+                } => {
+                    // Figure out the fork corresponding to the associated join.
+                    let fork_id = if let Node::Join { control } =
+                        self.function.function.nodes[control.idx()]
+                    {
+                        if let Type::Control(factors) =
+                            &self.function.types[self.function.typing[control.idx()].idx()]
+                        {
+                            *factors.last().unwrap()
+                        } else {
+                            panic!()
+                        }
+                    } else {
+                        panic!()
+                    };
+
+                    // Check if the basic block containing the user node is in
+                    // the fork-join nest for this reduce node. We make the user
+                    // node an optional argument as a debugging tool - if we
+                    // exercise this code branch when generating the code for a
+                    // node that absolutely should not be using the result of a
+                    // reduce node, we would like to know!
+                    if self.function.fork_join_nest[&self.function.bbs[user.expect("PANIC: cpu_emit_use_of_node was called on a reduce node, but no user node ID was given.").idx()]]
+                        .contains(&fork_id)
+                    {
+                        // If the user is inside the fork-join nest, then emit
+                        // the reduce node directly.
+                        assert_eq!(self.partition_id, self.function.plan.partitions[id.idx()]);
+                        write!(w, "%virt.{}", id.idx())?;
+                    } else {
+                        // If the user is outside the fork-join nest, then
+                        // recursively emit on the reduction input to the reduce
+                        // node. This is needed when there is a reduce chain.
+                        assert_eq!(
+                            self.partition_id,
+                            self.function.plan.partitions[reduct.idx()]
+                        );
+                        self.cpu_emit_use_of_node(reduct, user, emit_type, w)?;
+                    }
+                }
+                // Uses that are in this partition are just virtual registers.
+                // Clang is really annoying about numbering virtual registers,
+                // so to avoid that silliness we prepend all our virtual
+                // registers with a prefix indicating what kind of thing it is.
+                // For normal values, we use "virt" for virtual register.
+                _ => {
+                    assert_eq!(self.partition_id, self.function.plan.partitions[id.idx()]);
+                    write!(w, "%virt.{}", id.idx())?;
+                }
+            }
+        }
+
+        Ok(())
+    }
+}
diff --git a/hercules_cg/src/cpu_beta.rs b/hercules_cg/src/cpu_beta.rs
index e3974111..1a563143 100644
--- a/hercules_cg/src/cpu_beta.rs
+++ b/hercules_cg/src/cpu_beta.rs
@@ -3,6 +3,7 @@ extern crate hercules_ir;
 
 use std::collections::HashMap;
 use std::collections::VecDeque;
+
 use std::fmt::Write;
 use std::iter::zip;
 
diff --git a/hercules_cg/src/lib.rs b/hercules_cg/src/lib.rs
index 2d1293de..280910dd 100644
--- a/hercules_cg/src/lib.rs
+++ b/hercules_cg/src/lib.rs
@@ -1,3 +1,11 @@
+#![feature(let_chains)]
+
+pub mod common;
+pub mod cpu;
 pub mod cpu_beta;
+pub mod top;
 
+pub use crate::common::*;
+pub use crate::cpu::*;
 pub use crate::cpu_beta::*;
+pub use crate::top::*;
diff --git a/hercules_cg/src/top.rs b/hercules_cg/src/top.rs
new file mode 100644
index 00000000..b992344d
--- /dev/null
+++ b/hercules_cg/src/top.rs
@@ -0,0 +1,135 @@
+extern crate hercules_ir;
+extern crate hercules_rt;
+
+use std::collections::HashMap;
+use std::fmt::Write;
+
+use self::hercules_ir::*;
+use self::hercules_rt::manifest::*;
+
+use crate::*;
+
+/*
+ * Top level function to generate code for a module. Emits LLVM IR text. Calls
+ * out to backends to generate code for individual partitions. Creates a
+ * manifest describing the generated code.
+ */
+pub fn codegen<W: Write>(
+    module: &Module,
+    def_uses: &Vec<ImmutableDefUseMap>,
+    reverse_postorders: &Vec<Vec<NodeID>>,
+    typing: &ModuleTyping,
+    control_subgraphs: &Vec<Subgraph>,
+    fork_join_maps: &Vec<HashMap<NodeID, NodeID>>,
+    fork_join_nests: &Vec<HashMap<NodeID, Vec<NodeID>>>,
+    antideps: &Vec<Vec<(NodeID, NodeID)>>,
+    bbs: &Vec<Vec<NodeID>>,
+    plans: &Vec<Plan>,
+    w: &mut W,
+) -> Result<ModuleManifest, std::fmt::Error> {
+    // Render types, constants, and dynamic constants into LLVM IR.
+    let llvm_types = generate_type_strings(module);
+    let llvm_constants = generate_constant_strings(module);
+    let llvm_dynamic_constants = generate_dynamic_constant_strings(module);
+
+    // Generate a dummy uninitialized global - this is needed so that there'll
+    // be a non-empty .bss section in the ELF object file.
+    write!(w, "@dummy = dso_local global i32 0, align 4\n")?;
+
+    // Do codegen for each function individually. Get each function's manifest.
+    let mut manifests = vec![];
+    for function_idx in 0..module.functions.len() {
+        // There's a bunch of per-function information we use.
+        let context = FunctionContext {
+            function: &module.functions[function_idx],
+            types: &module.types,
+            constants: &module.constants,
+            dynamic_constants: &module.dynamic_constants,
+            def_use: &def_uses[function_idx],
+            reverse_postorder: &reverse_postorders[function_idx],
+            typing: &typing[function_idx],
+            control_subgraph: &control_subgraphs[function_idx],
+            fork_join_map: &fork_join_maps[function_idx],
+            fork_join_nest: &fork_join_nests[function_idx],
+            antideps: &antideps[function_idx],
+            bbs: &bbs[function_idx],
+            plan: &plans[function_idx],
+            llvm_types: &llvm_types,
+            llvm_constants: &llvm_constants,
+            llvm_dynamic_constants: &llvm_dynamic_constants,
+            partitions_inverted_map: plans[function_idx].invert_partition_map(),
+        };
+
+        manifests.push(context.codegen_function(w)?);
+    }
+
+    // Assemble the manifest for the whole module.
+    Ok(ModuleManifest {
+        functions: manifests,
+        types: module.types.clone(),
+        // TODO: populate array constants.
+        array_constants: vec![],
+    })
+}
+
+impl<'a> FunctionContext<'a> {
+    /*
+     * Each function gets codegened separately.
+     */
+    fn codegen_function<W: Write>(&self, w: &mut W) -> Result<FunctionManifest, std::fmt::Error> {
+        // Find the "top" control node of each partition. One well-formedness
+        // condition of partitions is that there is exactly one "top" control
+        // node.
+        let top_nodes: Vec<NodeID> = self
+            .partitions_inverted_map
+            .iter()
+            .enumerate()
+            .map(|(part_idx, part)| {
+                // For each partition, find the "top" node.
+                *part
+                    .iter()
+                    .filter(move |id| {
+                        // The "top" node is a control node having at least one
+                        // control predecessor in another partition, or is a
+                        // start node. Every predecessor in the control subgraph
+                        // is a control node.
+                        self.function.nodes[id.idx()].is_start()
+                            || (self.function.nodes[id.idx()].is_control()
+                                && self
+                                    .control_subgraph
+                                    .preds(**id)
+                                    .filter(|pred_id| {
+                                        self.plan.partitions[pred_id.idx()].idx() != part_idx
+                                    })
+                                    .count()
+                                    > 0)
+                    })
+                    .next()
+                    .unwrap()
+            })
+            .collect();
+
+        // Generate code for each individual partition. This generates a single
+        // LLVM function per partition. These functions will be called in async
+        // tasks by the Hercules runtime.
+        assert_eq!(self.plan.num_partitions, top_nodes.len());
+        let mut manifests = vec![];
+        for part_idx in 0..self.plan.num_partitions {
+            match self.plan.partition_devices[part_idx] {
+                Device::CPU => manifests.push(self.codegen_cpu_partition(top_nodes[part_idx], w)?),
+                Device::GPU => todo!(),
+            }
+        }
+
+        // Assemble the manifest for the whole function.
+        Ok(FunctionManifest {
+            name: self.function.name.clone(),
+            param_types: self.function.param_types.clone(),
+            typing: self.typing.clone(),
+            num_dynamic_constant_parameters: self.function.num_dynamic_constants,
+            partitions: manifests,
+            // TODO: populate dynamic constant rules.
+            dynamic_constant_rules: vec![],
+        })
+    }
+}
diff --git a/hercules_ir/Cargo.toml b/hercules_ir/Cargo.toml
index 39fbebe5..b99c0877 100644
--- a/hercules_ir/Cargo.toml
+++ b/hercules_ir/Cargo.toml
@@ -7,4 +7,5 @@ authors = ["Russel Arbore <rarbore2@illinois.edu>, Aaron Councilman <aaronjc4@il
 rand = "*"
 nom = "*"
 ordered-float = "*"
-bitvec = "*"
\ No newline at end of file
+bitvec = "*"
+serde = { version = "*", features = ["derive"] }
\ No newline at end of file
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 0688c137..075d10c1 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -1,5 +1,6 @@
 extern crate bitvec;
 extern crate ordered_float;
+extern crate serde;
 
 use std::fmt::Write;
 use std::ops::Coroutine;
@@ -7,6 +8,8 @@ use std::ops::CoroutineState;
 use std::pin::Pin;
 
 use self::bitvec::prelude::*;
+use self::serde::Deserialize;
+use self::serde::Serialize;
 
 use crate::*;
 
@@ -52,7 +55,7 @@ pub struct Function {
  * parallelism. Summation types are an IR equivalent of Rust's enum types.
  * These are lowered into tagged unions during scheduling.
  */
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub enum Type {
     Control(Box<[NodeID]>),
     Boolean,
@@ -379,7 +382,7 @@ impl Module {
         let mut stack = (0..self.types.len())
             .map(TypeID::new)
             .collect::<Vec<TypeID>>();
-        let coroutine = move || {
+        let coroutine = #[coroutine] move || {
             // Since this is a coroutine, handle recursion manually.
             while let Some(id) = stack.pop() {
                 if visited[id.idx()] {
@@ -438,7 +441,7 @@ impl Module {
         let mut stack = (0..self.constants.len())
             .map(ConstantID::new)
             .collect::<Vec<ConstantID>>();
-        let coroutine = move || {
+        let coroutine = #[coroutine] move || {
             // Since this is a coroutine, handle recursion manually.
             while let Some(id) = stack.pop() {
                 if visited[id.idx()] {
@@ -593,7 +596,8 @@ impl<T: Clone> GraveUpdatable for Vec<T> {
         for (data, (idx, mapping)) in
             std::iter::zip(self.into_iter(), grave_mapping.iter().enumerate())
         {
-            if idx != 0 && mapping.idx() == 0 {
+            if idx == 0 || mapping.idx() != 0 {
+                assert_eq!(new_self.len(), mapping.idx());
                 new_self.push(data.clone());
             }
         }
@@ -695,6 +699,22 @@ impl Constant {
         }
     }
 
+    pub fn try_array_type(&self, types: &[Type]) -> Option<TypeID> {
+        // Need types, since zero initializer may be for a collection type, ro
+        // not.
+        match self {
+            Constant::Array(ty, _) => Some(*ty),
+            Constant::Zero(ty) => {
+                if types[ty.idx()].is_primitive() {
+                    None
+                } else {
+                    Some(*ty)
+                }
+            }
+            _ => None,
+        }
+    }
+
     /*
      * Useful for GVN.
      */
@@ -732,6 +752,16 @@ impl Constant {
     }
 }
 
+impl DynamicConstant {
+    pub fn is_parameter(&self) -> bool {
+        if let DynamicConstant::Parameter(_) = self {
+            true
+        } else {
+            false
+        }
+    }
+}
+
 /*
  * Simple predicate functions on nodes take a lot of space, so use a macro.
  */
@@ -825,6 +855,9 @@ impl Node {
             data: _,
         }
     );
+    define_pattern_predicate!(is_parameter, Node::Parameter { index: _ });
+    define_pattern_predicate!(is_constant, Node::Constant { id: _ });
+    define_pattern_predicate!(is_dynamic_constant, Node::DynamicConstant { id: _ });
     define_pattern_predicate!(
         is_read,
         Node::Read {
@@ -1148,7 +1181,18 @@ impl TernaryOperator {
 #[macro_export]
 macro_rules! define_id_type {
     ($x: ident) => {
-        #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
+        #[derive(
+            Debug,
+            Clone,
+            Copy,
+            PartialEq,
+            Eq,
+            Hash,
+            PartialOrd,
+            Ord,
+            serde::Serialize,
+            serde::Deserialize,
+        )]
         pub struct $x(u32);
 
         impl $x {
diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs
index 9da0276d..44252678 100644
--- a/hercules_ir/src/lib.rs
+++ b/hercules_ir/src/lib.rs
@@ -1,4 +1,4 @@
-#![feature(coroutines, coroutine_trait, let_chains)]
+#![feature(coroutines, coroutine_trait, let_chains, stmt_expr_attributes)]
 
 pub mod antideps;
 pub mod build;
diff --git a/hercules_ir/src/schedule.rs b/hercules_ir/src/schedule.rs
index 9881c3be..f8c35276 100644
--- a/hercules_ir/src/schedule.rs
+++ b/hercules_ir/src/schedule.rs
@@ -189,7 +189,7 @@ pub fn default_plan(
         schedules: vec![vec![]; function.nodes.len()],
         partitions: vec![PartitionID::new(0); function.nodes.len()],
         partition_devices: vec![Device::CPU; 1],
-        num_partitions: 0,
+        num_partitions: 1,
     };
 
     // Infer schedules.
@@ -198,7 +198,8 @@ pub fn default_plan(
 
     // Infer a partitioning.
     partition_out_forks(function, reverse_postorder, fork_join_map, bbs, &mut plan);
-    place_fork_partitions_on_gpu(function, &mut plan);
+    // TODO: uncomment once GPU backend is implemented.
+    // place_fork_partitions_on_gpu(function, &mut plan);
 
     plan
 }
@@ -349,13 +350,15 @@ pub fn partition_out_forks(
         reverse_postorder,
         |inputs: &[&NodeID], node_id: NodeID| match function.nodes[node_id.idx()] {
             Node::Start => NodeID::new(0),
-            Node::Fork {
-                control: _,
-                factor: _,
-            } => {
+            Node::Fork { control, factor: _ } => {
                 // Start a partition if the preceding partition isn't a fork
-                // partition. Otherwise, be part of the parent fork partition.
-                if *inputs[0] != NodeID::top() && function.nodes[inputs[0].idx()].is_fork() {
+                // partition and the predecessor isn't the join for the
+                // predecessor fork partition. Otherwise, be part of the parent
+                // fork partition.
+                if *inputs[0] != NodeID::top()
+                    && function.nodes[inputs[0].idx()].is_fork()
+                    && fork_join_map.get(&inputs[0]) != Some(&control)
+                {
                     inputs[0].clone()
                 } else {
                     node_id
diff --git a/hercules_ir/src/subgraph.rs b/hercules_ir/src/subgraph.rs
index c0b7aa4b..6d76f6fc 100644
--- a/hercules_ir/src/subgraph.rs
+++ b/hercules_ir/src/subgraph.rs
@@ -37,6 +37,12 @@ impl<'a> Iterator for SubgraphIterator<'a> {
     }
 }
 
+impl<'a> ExactSizeIterator for SubgraphIterator<'a> {
+    fn len(&self) -> usize {
+        self.edges.len()
+    }
+}
+
 impl IntoIterator for Subgraph {
     type Item = NodeID;
     type IntoIter = std::vec::IntoIter<Self::Item>;
diff --git a/hercules_opt/Cargo.toml b/hercules_opt/Cargo.toml
index 5cf76b09..6da77f44 100644
--- a/hercules_opt/Cargo.toml
+++ b/hercules_opt/Cargo.toml
@@ -7,4 +7,7 @@ authors = ["Russel Arbore <rarbore2@illinois.edu>, Aaron Councilman <aaronjc4@il
 ordered-float = "*"
 bitvec = "*"
 take_mut = "*"
+postcard = { version = "*", features = ["alloc"] }
+serde = { version = "*", features = ["derive"] }
 hercules_ir = { path = "../hercules_ir" }
+hercules_cg = { path = "../hercules_cg" }
diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs
index 394de3c0..ef79b36e 100644
--- a/hercules_opt/src/pass.rs
+++ b/hercules_opt/src/pass.rs
@@ -1,28 +1,26 @@
+extern crate hercules_cg;
 extern crate hercules_ir;
+extern crate postcard;
+extern crate serde;
 extern crate take_mut;
 
 use std::collections::HashMap;
+use std::fs::File;
+use std::io::prelude::*;
 use std::iter::zip;
+use std::process::*;
 
-use self::hercules_ir::antideps::*;
-use self::hercules_ir::dataflow::*;
-use self::hercules_ir::def_use::*;
-use self::hercules_ir::dom::*;
-use self::hercules_ir::dot::*;
-use self::hercules_ir::gcm::*;
-use self::hercules_ir::ir::*;
-use self::hercules_ir::loops::*;
-use self::hercules_ir::schedule::*;
-use self::hercules_ir::subgraph::*;
-use self::hercules_ir::typecheck::*;
-use self::hercules_ir::verify::*;
+use self::serde::Deserialize;
+
+use self::hercules_cg::*;
+use self::hercules_ir::*;
 
 use crate::*;
 
 /*
  * Passes that can be run on a module.
  */
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Deserialize)]
 pub enum Pass {
     DCE,
     CCP,
@@ -31,7 +29,11 @@ pub enum Pass {
     PhiElim,
     Predication,
     Verify,
+    // Parameterized over whether analyses that aid visualization are necessary.
+    // Useful to set to false if displaying a potentially broken module.
     Xdot(bool),
+    // Parameterized by output file name.
+    Codegen(String),
 }
 
 /*
@@ -221,7 +223,7 @@ impl PassManager {
     }
 
     pub fn make_bbs(&mut self) {
-        if self.antideps.is_none() {
+        if self.bbs.is_none() {
             self.make_def_uses();
             self.make_reverse_postorders();
             self.make_doms();
@@ -390,6 +392,61 @@ impl PassManager {
                     // Xdot doesn't require clearing analysis results.
                     continue;
                 }
+                Pass::Codegen(output_file_name) => {
+                    self.make_def_uses();
+                    self.make_reverse_postorders();
+                    self.make_typing();
+                    self.make_control_subgraphs();
+                    self.make_fork_join_maps();
+                    self.make_fork_join_nests();
+                    self.make_antideps();
+                    self.make_bbs();
+                    self.make_plans();
+
+                    let mut llvm_ir = String::new();
+                    let manifest = codegen(
+                        &self.module,
+                        self.def_uses.as_ref().unwrap(),
+                        self.reverse_postorders.as_ref().unwrap(),
+                        self.typing.as_ref().unwrap(),
+                        self.control_subgraphs.as_ref().unwrap(),
+                        self.fork_join_maps.as_ref().unwrap(),
+                        self.fork_join_nests.as_ref().unwrap(),
+                        self.antideps.as_ref().unwrap(),
+                        self.bbs.as_ref().unwrap(),
+                        self.plans.as_ref().unwrap(),
+                        &mut llvm_ir,
+                    )
+                    .unwrap();
+
+                    // Compile LLVM IR into ELF object.
+                    let llc_process = Command::new("llc")
+                        .arg("-filetype=obj")
+                        .arg("-O3")
+                        .stdin(Stdio::piped())
+                        .stdout(Stdio::piped())
+                        .spawn()
+                        .unwrap();
+                    llc_process
+                        .stdin
+                        .as_ref()
+                        .unwrap()
+                        .write(llvm_ir.as_bytes())
+                        .unwrap();
+                    let elf_object = llc_process.wait_with_output().unwrap().stdout;
+
+                    // Package manifest and ELF object into the same file.
+                    let hbin_module = (manifest, elf_object);
+                    let hbin_contents: Vec<u8> = postcard::to_allocvec(&hbin_module).unwrap();
+
+                    let mut file =
+                        File::create(output_file_name).expect("PANIC: Unable to open output file.");
+                    file.write_all(&hbin_contents)
+                        .expect("PANIC: Unable to write output file contents.");
+
+                    // Codegen doesn't require clearing analysis results.
+                    continue;
+                }
             }
 
             // Cleanup the module after passes. Delete gravestone nodes. Repair
diff --git a/hercules_rt/Cargo.toml b/hercules_rt/Cargo.toml
index 500265c4..3df92d6a 100644
--- a/hercules_rt/Cargo.toml
+++ b/hercules_rt/Cargo.toml
@@ -5,3 +5,6 @@ authors = ["Russel Arbore <rarbore2@illinois.edu>"]
 
 [dependencies]
 libc = "*"
+postcard = { version = "*", features = ["alloc"] }
+serde = { version = "*", features = ["derive"] }
+hercules_ir = { path = "../hercules_ir" }
diff --git a/hercules_rt/src/lib.rs b/hercules_rt/src/lib.rs
index f04c6c6a..2d19454d 100644
--- a/hercules_rt/src/lib.rs
+++ b/hercules_rt/src/lib.rs
@@ -1,27 +1,34 @@
+extern crate postcard;
+
 use std::fs::File;
 use std::io::prelude::*;
 use std::path::Path;
 
 pub(crate) mod elf;
+pub mod manifest;
 pub(crate) use crate::elf::*;
+pub(crate) use crate::manifest::*;
 
 #[derive(Debug)]
 pub struct Module {
+    manifest: ModuleManifest,
     elf: Elf,
 }
 
 impl Module {
-    pub fn get_function_ptr(&self, name: &str) -> *mut u8 {
-        unsafe {
-            self.elf.program_section.offset(
-                self.elf.function_pointers[self
-                    .elf
-                    .function_names
-                    .iter()
-                    .position(|s| s == name)
-                    .unwrap()],
-            )
-        }
+    /*
+     * Get the function pointer corresponding to a function name. Panic if not
+     * found.
+     */
+    pub unsafe fn get_function_ptr(&self, name: &str) -> *mut u8 {
+        self.elf.program_section.offset(
+            self.elf.function_pointers[self
+                .elf
+                .function_names
+                .iter()
+                .position(|s| s == name)
+                .unwrap()],
+        )
     }
 }
 
@@ -29,8 +36,12 @@ pub fn load_binary(path: &Path) -> Module {
     let mut f = File::open(path).unwrap();
     let mut buffer = vec![];
     f.read_to_end(&mut buffer).unwrap();
-    let elf = unsafe { parse_elf(buffer.as_slice()) };
-    Module { elf }
+    let manifest_and_elf_bytes: (ModuleManifest, Vec<u8>) = postcard::from_bytes(&buffer).unwrap();
+    let elf = unsafe { parse_elf(&manifest_and_elf_bytes.1) };
+    Module {
+        manifest: manifest_and_elf_bytes.0,
+        elf,
+    }
 }
 
 /*
diff --git a/hercules_rt/src/manifest.rs b/hercules_rt/src/manifest.rs
new file mode 100644
index 00000000..c184e8b1
--- /dev/null
+++ b/hercules_rt/src/manifest.rs
@@ -0,0 +1,97 @@
+extern crate hercules_ir;
+extern crate serde;
+
+use self::serde::Deserialize;
+use self::serde::Serialize;
+
+use self::hercules_ir::ir::*;
+
+/*
+ * Every .hbin file contains a manifest which describes the Hercules functions
+ * contained in the module. This information is used by the runtime to execute
+ * the functions properly, the chief concern being how to stitch together the
+ * execution of each partition.
+ */
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ModuleManifest {
+    // A module contains a manifest per individual function.
+    pub functions: Vec<FunctionManifest>,
+    // All of the types used in the module.
+    pub types: Vec<Type>,
+    // The only constants that aren't baked into the generated code are array
+    // constants. These are explicitly stored in and loaded from the manifest.
+    // Arrays are composed of the underlying array bytes. We don't need to store
+    // the dimensions of arrays at this point, since the runtime doesn't
+    // manipulate or otherwise need the dimensions of constant arrays.
+    pub array_constants: Vec<Vec<u8>>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct FunctionManifest {
+    pub name: String,
+    // Types of the function parameters.
+    pub param_types: Vec<TypeID>,
+    // Types of all of the nodes in this function. Used for figuring out the
+    // type of partition data inputs and outputs.
+    pub typing: Vec<TypeID>,
+    // Number of dynamic constant parameters that need to provided.
+    pub num_dynamic_constant_parameters: u32,
+    // Manifests for constituent partitions.
+    pub partitions: Vec<PartitionManifest>,
+    // When using dynamic constants, certain constraints are generated. For
+    // example, using a dynamic constant in a fork means that it must be non-
+    // zero, since fork-join nests are guaranteed to execute at least one
+    // iteration. Also, if one uses division in dynamic constant math, the
+    // resulting dynamic constant must be an integer, so the numerator dynamic
+    // constant must be divisible by the denominator dynamic constant. These are
+    // stored per function, since different functions have different contraints
+    // on their dynamic constant parameters.
+    pub dynamic_constant_rules: Vec<DynamicConstantRule>,
+}
+
+/*
+ * Rules for validity of provided dynamic constants. Integers refer to dynamic
+ * constant parameters of a function.
+ */
+#[derive(Debug, Serialize, Deserialize)]
+pub enum DynamicConstantRule {
+    // Generated from forks.
+    NonZero(u32),
+    // Generated from subtraction.
+    LessThan(u32, u32),
+    // Generated from division.
+    Divides(u32, u32),
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub enum PartitionInput {
+    // Data input from another partition within this function. Integer is the
+    // node ID used from the other partition.
+    DataInput(u32),
+    // An argument from the function parameters. Integer is the parameter index.
+    FunctionArgument(u32),
+    // An array constant used in this function. Integer is the array constant
+    // number.
+    ArrayConstant(u32),
+    // A dynamic constant parameter of this function. Integer is the dynamic
+    // constant parameter number.
+    DynamicConstant(u32),
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub enum PartitionOutput {
+    // Data output used by another partition within this function, or to be
+    // returned from this function. Integer is the node ID used in the other
+    // partition or by a return node.
+    DataOutput(u32),
+    // Value indicating control flow that the runtime should take.
+    ControlIndicator,
+}
+
+#[derive(Debug, Serialize, Deserialize, Default)]
+pub struct PartitionManifest {
+    // Top node for this partition, as an integer.
+    pub top_node: u32,
+    pub inputs: Vec<PartitionInput>,
+    pub outputs: Vec<PartitionOutput>,
+}
diff --git a/hercules_samples/matmul/src/main.rs b/hercules_samples/matmul/src/main.rs
index 2a28cd80..36177bea 100644
--- a/hercules_samples/matmul/src/main.rs
+++ b/hercules_samples/matmul/src/main.rs
@@ -3,17 +3,19 @@ extern crate clap;
 use std::path::Path;
 
 fn main() {
-    let module = hercules_rt::load_binary(Path::new("test.o"));
+    let module = hercules_rt::load_binary(Path::new("matmul.hbin"));
+
+    println!("{:?}", module);
 
     let matmul = hercules_rt::lookup_function!(
         module,
-        "matmul",
+        "matmul_part_1",
         *const f32,
         *const f32,
+        *mut f32,
         u64,
         u64,
         u64,
-        *mut f32,
         => *const f32
     );
 
@@ -24,10 +26,10 @@ fn main() {
         matmul(
             std::mem::transmute(a.as_ptr()),
             std::mem::transmute(b.as_ptr()),
+            std::mem::transmute(c.as_mut_ptr()),
             2,
             2,
             2,
-            std::mem::transmute(c.as_mut_ptr()),
         )
     };
     println!("{} {}\n{} {}", c[0][0], c[0][1], c[1][0], c[1][1]);
diff --git a/hercules_samples/sum_sample.hir b/hercules_samples/sum_sample.hir
index 8b8c0024..2ff76749 100644
--- a/hercules_samples/sum_sample.hir
+++ b/hercules_samples/sum_sample.hir
@@ -1,8 +1,8 @@
-fn sum(a: array(f32, 16)) -> f32
+fn sum<1>(a: array(f32, #0)) -> f32
   zero_idx = constant(u64, 0)
   one_idx = constant(u64, 1)
   zero_inc = constant(f32, 0)
-  bound = constant(u64, 16)
+  bound = dynamic_constant(#0)
   loop = region(start, if_true)
   idx = phi(loop, zero_idx, idx_inc)
   idx_inc = add(idx, one_idx)
@@ -39,4 +39,4 @@ fn alt_sum<1>(a: array(f32, #0)) -> f32
   if = if(negate_bottom, in_bounds)
   if_false = read(if, control(0))
   if_true = read(if, control(1))
-  r = return(if_false, red_add)
\ No newline at end of file
+  r = return(if_false, red_add)
diff --git a/hercules_samples/task_parallel.hir b/hercules_samples/task_parallel.hir
new file mode 100644
index 00000000..6386d5ec
--- /dev/null
+++ b/hercules_samples/task_parallel.hir
@@ -0,0 +1,14 @@
+fn task_parallel<1>() -> u64
+  f_ctrl1 = fork(start, #0)
+  j_ctrl1 = join(f_ctrl1)
+  zero = constant(u64, 0)
+  x1 = thread_id(f_ctrl1)
+  data1 = reduce(j_ctrl1, zero, sum1)
+  sum1 = add(data1, x1)
+  f_ctrl2 = fork(j_ctrl1, #0)
+  j_ctrl2 = join(f_ctrl2)
+  x2 = thread_id(f_ctrl2)
+  data2 = reduce(j_ctrl2, zero, sum2)
+  sum2 = add(data2, x2)
+  final = add(data1, data2)
+  r = return(j_ctrl2, final)
diff --git a/hercules_tools/hercules_cpu/Cargo.toml b/hercules_tools/hercules_cpu_beta/Cargo.toml
similarity index 91%
rename from hercules_tools/hercules_cpu/Cargo.toml
rename to hercules_tools/hercules_cpu_beta/Cargo.toml
index 58519ce5..38b30e82 100644
--- a/hercules_tools/hercules_cpu/Cargo.toml
+++ b/hercules_tools/hercules_cpu_beta/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "hercules_cpu"
+name = "hercules_cpu_beta"
 version = "0.1.0"
 authors = ["Russel Arbore <rarbore2@illinois.edu>"]
 
diff --git a/hercules_tools/hercules_cpu/src/main.rs b/hercules_tools/hercules_cpu_beta/src/main.rs
similarity index 94%
rename from hercules_tools/hercules_cpu/src/main.rs
rename to hercules_tools/hercules_cpu_beta/src/main.rs
index c1b66ede..2edd426a 100644
--- a/hercules_tools/hercules_cpu/src/main.rs
+++ b/hercules_tools/hercules_cpu_beta/src/main.rs
@@ -17,7 +17,7 @@ struct Args {
 fn main() {
     let args = Args::parse();
     if !args.hir_file.ends_with(".hir") {
-        eprintln!("WARNING: Running hercules_cpu on a file without a .hir extension - interpreting as a textual Hercules IR file.");
+        eprintln!("WARNING: Running hercules_cpu_beta on a file without a .hir extension - interpreting as a textual Hercules IR file.");
     }
 
     let mut file = File::open(args.hir_file).expect("PANIC: Unable to open input file.");
diff --git a/hercules_tools/hercules_driver/Cargo.toml b/hercules_tools/hercules_driver/Cargo.toml
new file mode 100644
index 00000000..aa6d4f5e
--- /dev/null
+++ b/hercules_tools/hercules_driver/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "hercules_driver"
+version = "0.1.0"
+authors = ["Russel Arbore <rarbore2@illinois.edu>"]
+
+[dependencies]
+clap = { version = "*", features = ["derive"] }
+ron = "*"
+hercules_ir = { path = "../../hercules_ir" }
+hercules_opt = { path = "../../hercules_opt" }
diff --git a/hercules_tools/hercules_driver/src/main.rs b/hercules_tools/hercules_driver/src/main.rs
new file mode 100644
index 00000000..17be3596
--- /dev/null
+++ b/hercules_tools/hercules_driver/src/main.rs
@@ -0,0 +1,44 @@
+extern crate clap;
+
+use std::fs::File;
+use std::io::prelude::*;
+
+use clap::Parser;
+
+#[derive(Parser, Debug)]
+#[command(author, version, about, long_about = None)]
+struct Args {
+    hir_file: String,
+    passes: String,
+}
+
+fn main() {
+    let args = Args::parse();
+    if !args.hir_file.ends_with(".hir") {
+        eprintln!("WARNING: Running hercules_driver on a file without a .hir extension - interpreting as a textual Hercules IR file.");
+    }
+
+    let mut file = File::open(args.hir_file).expect("PANIC: Unable to open input file.");
+    let mut contents = String::new();
+    file.read_to_string(&mut contents)
+        .expect("PANIC: Unable to read input file contents.");
+    let module =
+        hercules_ir::parse::parse(&contents).expect("PANIC: Failed to parse Hercules IR file.");
+
+    let mut pm = hercules_opt::pass::PassManager::new(module);
+    let passes: Vec<hercules_opt::pass::Pass> = args
+        .passes
+        .split(char::is_whitespace)
+        .map(|pass_str| {
+            assert_ne!(
+                pass_str, "",
+                "PANIC: Can't interpret empty pass name. Try giving a list of pass names."
+            );
+            ron::from_str(pass_str).expect("PANIC: Couldn't parse list of passes.")
+        })
+        .collect();
+    for pass in passes {
+        pm.add_pass(pass);
+    }
+    pm.run_passes();
+}
-- 
GitLab