From d7498a401ce04072c17e01b7000fe1578e0e54d5 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 11 Sep 2023 10:58:23 -0500
Subject: [PATCH 001/105] parse rest of ir nodes

---
 hercules_ir/src/parse.rs | 45 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 76956e84..32423412 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -207,9 +207,13 @@ fn parse_node<'a>(
         "if" => parse_if(ir_text, context)?,
         "fork" => parse_fork(ir_text, context)?,
         "join" => parse_join(ir_text, context)?,
+        "phi" => parse_phi(ir_text, context)?,
         "return" => parse_return(ir_text, context)?,
         "constant" => parse_constant_node(ir_text, context)?,
         "add" => parse_add(ir_text, context)?,
+        "sub" => parse_sub(ir_text, context)?,
+        "mul" => parse_mul(ir_text, context)?,
+        "div" => parse_div(ir_text, context)?,
         "call" => parse_call(ir_text, context)?,
         _ => Err(nom::Err::Error(nom::error::Error {
             input: ir_text,
@@ -260,6 +264,26 @@ fn parse_join<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
     Ok((ir_text, Node::Join { control, factor }))
 }
 
+fn parse_phi<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (control, data)) = parse_tuple2(
+        parse_identifier,
+        nom::multi::separated_list1(
+            nom::sequence::tuple((
+                nom::character::complete::multispace0,
+                nom::character::complete::char(','),
+                nom::character::complete::multispace0,
+            )),
+            parse_identifier,
+        ),
+    )(ir_text)?;
+    let control = context.borrow_mut().get_node_id(control);
+    let data = data
+        .into_iter()
+        .map(|x| context.borrow_mut().get_node_id(x))
+        .collect();
+    Ok((ir_text, Node::Phi { control, data }))
+}
+
 fn parse_return<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
@@ -294,6 +318,27 @@ fn parse_add<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResu
     Ok((ir_text, Node::Add { left, right }))
 }
 
+fn parse_sub<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let left = context.borrow_mut().get_node_id(left);
+    let right = context.borrow_mut().get_node_id(right);
+    Ok((ir_text, Node::Sub { left, right }))
+}
+
+fn parse_mul<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let left = context.borrow_mut().get_node_id(left);
+    let right = context.borrow_mut().get_node_id(right);
+    Ok((ir_text, Node::Mul { left, right }))
+}
+
+fn parse_div<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let left = context.borrow_mut().get_node_id(left);
+    let right = context.borrow_mut().get_node_id(right);
+    Ok((ir_text, Node::Div { left, right }))
+}
+
 fn parse_call<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let parse_dynamic_constants =
-- 
GitLab


From 0b97b8c571316fed1af2b5e8d63a0435557c3ec3 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 11 Sep 2023 11:12:18 -0500
Subject: [PATCH 002/105] IR is pure for now

---
 DESIGN.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/DESIGN.md b/DESIGN.md
index 6ab08b1b..9d5d7ddd 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -32,6 +32,8 @@ The IR of the Hercules compiler is similar to the sea of nodes IR presented in "
 
 A key design consideration of Hercules IR is the absence of a concept of memory. A downside of this approach is that any language targetting Hecules IR must also be very restrictive regarding memory - in practice, this means tightly controlling or eliminating first-class references. The upside is that the compiler has complete freedom to layout data however it likes in memory when performing code generation. This includes deciding which data resides in which address spaces, which is a necessary ability for a compiler striving to have fine-grained control over what operations are computed on what devices.
 
+In addition to not having a generalized memory, Hercules IR has no functionality for calling functions with side-effects, or doing IO. In other words, Hercules is a pure IR (it's not functional, as functions aren't first class values). This may be changed in the future - we could support effectful programs by giving call operators a control input and output edge. However, at least for now, we need to work with the simplest IR possible.
+
 ### Optimizations
 
 TODO: @rarbore2
-- 
GitLab


From 8a6a36f1486ca1b9100aba90a60f7bf23bc1a3b7 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 12 Sep 2023 10:26:13 -0500
Subject: [PATCH 003/105] Parse + defs for accessing data in prod/sum/array
 types

---
 hercules_ir/src/dot.rs   | 18 +++++++++
 hercules_ir/src/ir.rs    | 27 ++++++++++++++
 hercules_ir/src/parse.rs | 80 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 125 insertions(+)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index b953c219..77e9c50e 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -147,5 +147,23 @@ fn get_string_node_kind(node: &Node) -> &'static str {
             dynamic_constants: _,
             args: _,
         } => "call",
+        Node::ReadProd { prod: _, index: _ } => "read_prod",
+        Node::WriteProd {
+            prod: _,
+            data: _,
+            index: _,
+        } => "write_prod ",
+        Node::ReadArray { array: _, index: _ } => "read_array",
+        Node::WriteArray {
+            array: _,
+            data: _,
+            index: _,
+        } => "write_array",
+        Node::Match { control: _, sum: _ } => "match",
+        Node::BuildSum {
+            data: _,
+            sum_ty: _,
+            variant: _,
+        } => "build_sum",
     }
 }
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index fbb9db03..ec27f03b 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -114,6 +114,33 @@ pub enum Node {
         dynamic_constants: Box<[DynamicConstantID]>,
         args: Box<[NodeID]>,
     },
+    ReadProd {
+        prod: NodeID,
+        index: usize,
+    },
+    WriteProd {
+        prod: NodeID,
+        data: NodeID,
+        index: usize,
+    },
+    ReadArray {
+        array: NodeID,
+        index: NodeID,
+    },
+    WriteArray {
+        array: NodeID,
+        data: NodeID,
+        index: NodeID,
+    },
+    Match {
+        control: NodeID,
+        sum: NodeID,
+    },
+    BuildSum {
+        data: NodeID,
+        sum_ty: TypeID,
+        variant: usize,
+    },
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 32423412..1bf180ea 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -215,6 +215,12 @@ fn parse_node<'a>(
         "mul" => parse_mul(ir_text, context)?,
         "div" => parse_div(ir_text, context)?,
         "call" => parse_call(ir_text, context)?,
+        "read_prod" => parse_read_prod(ir_text, context)?,
+        "write_prod" => parse_write_prod(ir_text, context)?,
+        "read_array" => parse_read_array(ir_text, context)?,
+        "write_array" => parse_write_array(ir_text, context)?,
+        "match" => parse_match(ir_text, context)?,
+        "build_sum" => parse_build_sum(ir_text, context)?,
         _ => Err(nom::Err::Error(nom::error::Error {
             input: ir_text,
             code: nom::error::ErrorKind::IsNot,
@@ -387,6 +393,50 @@ fn parse_call<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
     ))
 }
 
+fn parse_read_prod<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (prod, index)) =
+        parse_tuple2(parse_identifier, |x| parse_prim::<usize>(x, "1234567890"))(ir_text)?;
+    let prod = context.borrow_mut().get_node_id(prod);
+    Ok((ir_text, Node::ReadProd { prod, index }))
+}
+
+fn parse_write_prod<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (prod, data, index)) = parse_tuple3(parse_identifier, parse_identifier, |x| {
+        parse_prim::<usize>(x, "1234567890")
+    })(ir_text)?;
+    let prod = context.borrow_mut().get_node_id(prod);
+    let data = context.borrow_mut().get_node_id(data);
+    Ok((ir_text, Node::WriteProd { prod, data, index }))
+}
+
+fn parse_read_array<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (array, index)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let array = context.borrow_mut().get_node_id(array);
+    let index = context.borrow_mut().get_node_id(index);
+    Ok((ir_text, Node::ReadArray { array, index }))
+}
+
+fn parse_write_array<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (array, data, index)) =
+        parse_tuple3(parse_identifier, parse_identifier, parse_identifier)(ir_text)?;
+    let array = context.borrow_mut().get_node_id(array);
+    let data = context.borrow_mut().get_node_id(data);
+    let index = context.borrow_mut().get_node_id(index);
+    Ok((ir_text, Node::WriteArray { array, data, index }))
+}
+
 fn parse_type_id<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
@@ -397,6 +447,36 @@ fn parse_type_id<'a>(
     Ok((ir_text, id))
 }
 
+fn parse_match<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (control, sum)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let control = context.borrow_mut().get_node_id(control);
+    let sum = context.borrow_mut().get_node_id(sum);
+    Ok((ir_text, Node::Match { control, sum }))
+}
+
+fn parse_build_sum<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (data, sum_ty, variant)) = parse_tuple3(
+        parse_identifier,
+        |x| parse_type_id(x, context),
+        |x| parse_prim::<usize>(x, "1234567890"),
+    )(ir_text)?;
+    let data = context.borrow_mut().get_node_id(data);
+    Ok((
+        ir_text,
+        Node::BuildSum {
+            data,
+            sum_ty,
+            variant,
+        },
+    ))
+}
+
 fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Type> {
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let (ir_text, ty) = nom::branch::alt((
-- 
GitLab


From 66f7352658221384e8e0805e18fbf98e29b173a8 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 12 Sep 2023 10:29:22 -0500
Subject: [PATCH 004/105] Fix read/write array nodes

---
 hercules_ir/src/ir.rs    |  4 ++--
 hercules_ir/src/parse.rs | 36 +++++++++++++++++++++++++++++++-----
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index ec27f03b..9234371f 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -125,12 +125,12 @@ pub enum Node {
     },
     ReadArray {
         array: NodeID,
-        index: NodeID,
+        index: Box<[NodeID]>,
     },
     WriteArray {
         array: NodeID,
         data: NodeID,
-        index: NodeID,
+        index: Box<[NodeID]>,
     },
     Match {
         control: NodeID,
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 1bf180ea..c174c0b9 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -419,9 +419,22 @@ fn parse_read_array<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (array, index)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let (ir_text, (array, index)) = parse_tuple2(
+        parse_identifier,
+        nom::multi::separated_list1(
+            nom::sequence::tuple((
+                nom::character::complete::multispace0,
+                nom::character::complete::char(','),
+                nom::character::complete::multispace0,
+            )),
+            parse_identifier,
+        ),
+    )(ir_text)?;
     let array = context.borrow_mut().get_node_id(array);
-    let index = context.borrow_mut().get_node_id(index);
+    let index = index
+        .into_iter()
+        .map(|x| context.borrow_mut().get_node_id(x))
+        .collect();
     Ok((ir_text, Node::ReadArray { array, index }))
 }
 
@@ -429,11 +442,24 @@ fn parse_write_array<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (array, data, index)) =
-        parse_tuple3(parse_identifier, parse_identifier, parse_identifier)(ir_text)?;
+    let (ir_text, (array, data, index)) = parse_tuple3(
+        parse_identifier,
+        parse_identifier,
+        nom::multi::separated_list1(
+            nom::sequence::tuple((
+                nom::character::complete::multispace0,
+                nom::character::complete::char(','),
+                nom::character::complete::multispace0,
+            )),
+            parse_identifier,
+        ),
+    )(ir_text)?;
     let array = context.borrow_mut().get_node_id(array);
     let data = context.borrow_mut().get_node_id(data);
-    let index = context.borrow_mut().get_node_id(index);
+    let index = index
+        .into_iter()
+        .map(|x| context.borrow_mut().get_node_id(x))
+        .collect();
     Ok((ir_text, Node::WriteArray { array, data, index }))
 }
 
-- 
GitLab


From af1af4d5a1bffeb8e1d08dd0ac72df8f12a80d4e Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 12 Sep 2023 10:42:56 -0500
Subject: [PATCH 005/105] dynamic_constant node, add DC info to dot graphs

---
 hercules_ir/src/dot.rs   | 26 +++++++++++++++++++-------
 hercules_ir/src/parse.rs |  9 +++++++++
 samples/simple1.hir      |  4 ++--
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 77e9c50e..19f7c17b 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -19,7 +19,15 @@ fn write_function<W: std::fmt::Write>(
     w: &mut W,
 ) -> std::fmt::Result {
     write!(w, "subgraph {} {{\n", module.functions[i].name)?;
-    write!(w, "label=\"{}\"\n", module.functions[i].name)?;
+    if module.functions[i].num_dynamic_constants > 0 {
+        write!(
+            w,
+            "label=\"{}<{}>\"\n",
+            module.functions[i].name, module.functions[i].num_dynamic_constants
+        )?;
+    } else {
+        write!(w, "label=\"{}\"\n", module.functions[i].name)?;
+    }
     write!(w, "bgcolor=ivory4\n")?;
     write!(w, "cluster=true\n")?;
     let mut visited = HashMap::default();
@@ -90,12 +98,16 @@ fn write_node<W: std::fmt::Write>(
                     visited = tmp_visited;
                     write!(w, "{} -> {};\n", arg_name, name)?;
                 }
-                write!(
-                    w,
-                    "{} [label=\"call({})\"];\n",
-                    name,
-                    module.functions[function.idx()].name
-                )?;
+                write!(w, "{} [label=\"call<", name,)?;
+                for (idx, id) in dynamic_constants.iter().enumerate() {
+                    let dc = &module.dynamic_constants[id.idx()];
+                    if idx == 0 {
+                        write!(w, "{:?}", dc)?;
+                    } else {
+                        write!(w, ", {:?}", dc)?;
+                    }
+                }
+                write!(w, ">({})\"];\n", module.functions[function.idx()].name)?;
                 write!(
                     w,
                     "{} -> start_{}_0 [lhead={}];\n",
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index c174c0b9..0d0bb81e 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -210,6 +210,7 @@ fn parse_node<'a>(
         "phi" => parse_phi(ir_text, context)?,
         "return" => parse_return(ir_text, context)?,
         "constant" => parse_constant_node(ir_text, context)?,
+        "dynamic_constant" => parse_dynamic_constant_node(ir_text, context)?,
         "add" => parse_add(ir_text, context)?,
         "sub" => parse_sub(ir_text, context)?,
         "mul" => parse_mul(ir_text, context)?,
@@ -317,6 +318,14 @@ fn parse_constant_node<'a>(
     Ok((ir_text, Node::Constant { id }))
 }
 
+fn parse_dynamic_constant_node<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (id,)) = parse_tuple1(|x| parse_dynamic_constant_id(x, context))(ir_text)?;
+    Ok((ir_text, Node::DynamicConstant { id }))
+}
+
 fn parse_add<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
     let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
     let left = context.borrow_mut().get_node_id(left);
diff --git a/samples/simple1.hir b/samples/simple1.hir
index acfc6416..7e0b1d54 100644
--- a/samples/simple1.hir
+++ b/samples/simple1.hir
@@ -1,8 +1,8 @@
 fn myfunc(x: i32) -> i32
-  y = call(add, x, x)
+  y = call<5>(add, x, x)
   r = return(start, y)
 
-fn add(x: i32, y: i32) -> i32
+fn add<1>(x: i32, y: i32) -> i32
   c = constant(i8, 5)
   r = return(start, w)
   w = add(z, c)
-- 
GitLab


From 5887ab9a3e2f1dc401580d06fc9983f98e870134 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 12 Sep 2023 11:40:03 -0500
Subject: [PATCH 006/105] Add many nodes to dot output

---
 hercules_ir/src/dot.rs   | 132 ++++++++++++++++++++++++++++++++-------
 hercules_ir/src/ir.rs    |   1 +
 hercules_ir/src/parse.rs |  16 ++++-
 3 files changed, 122 insertions(+), 27 deletions(-)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 19f7c17b..47e4e6c5 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -6,18 +6,13 @@ pub fn write_dot<W: std::fmt::Write>(module: &Module, w: &mut W) -> std::fmt::Re
     write!(w, "digraph \"Module\" {{\n")?;
     write!(w, "compound=true\n")?;
     for i in 0..module.functions.len() {
-        write_function(i, module, &module.constants, w)?;
+        write_function(i, module, w)?;
     }
     write!(w, "}}\n")?;
     Ok(())
 }
 
-fn write_function<W: std::fmt::Write>(
-    i: usize,
-    module: &Module,
-    constants: &Vec<Constant>,
-    w: &mut W,
-) -> std::fmt::Result {
+fn write_function<W: std::fmt::Write>(i: usize, module: &Module, w: &mut W) -> std::fmt::Result {
     write!(w, "subgraph {} {{\n", module.functions[i].name)?;
     if module.functions[i].num_dynamic_constants > 0 {
         write!(
@@ -33,7 +28,7 @@ fn write_function<W: std::fmt::Write>(
     let mut visited = HashMap::default();
     let function = &module.functions[i];
     for j in 0..function.nodes.len() {
-        visited = write_node(i, j, module, constants, visited, w)?.1;
+        visited = write_node(i, j, module, visited, w)?.1;
     }
     write!(w, "}}\n")?;
     Ok(())
@@ -43,7 +38,6 @@ fn write_node<W: std::fmt::Write>(
     i: usize,
     j: usize,
     module: &Module,
-    constants: &Vec<Constant>,
     mut visited: HashMap<NodeID, String>,
     w: &mut W,
 ) -> Result<(String, HashMap<NodeID, String>), std::fmt::Error> {
@@ -59,11 +53,65 @@ fn write_node<W: std::fmt::Write>(
                 write!(w, "{} [label=\"start\"];\n", name)?;
                 visited
             }
+            Node::Region { preds } => {
+                write!(w, "{} [label=\"region\"];\n", name)?;
+                for pred in preds.iter() {
+                    let (pred_name, tmp_visited) = write_node(i, pred.idx(), module, visited, w)?;
+                    visited = tmp_visited;
+                    write!(w, "{} -> {};\n", pred_name, name)?;
+                }
+                visited
+            }
+            Node::If { control, cond } => {
+                write!(w, "{} [label=\"if\"];\n", name)?;
+                let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
+                let (cond_name, visited) = write_node(i, cond.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", control_name, name)?;
+                write!(w, "{} -> {};\n", cond_name, name)?;
+                visited
+            }
+            Node::Fork { control, factor } => {
+                write!(
+                    w,
+                    "{} [label=\"fork<{:?}>\"];\n",
+                    name,
+                    module.dynamic_constants[factor.idx()]
+                )?;
+                let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", control_name, name)?;
+                visited
+            }
+            Node::Join {
+                control,
+                data,
+                factor,
+            } => {
+                write!(
+                    w,
+                    "{} [label=\"join<{:?}>\"];\n",
+                    name,
+                    module.dynamic_constants[factor.idx()]
+                )?;
+                let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
+                let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", control_name, name)?;
+                write!(w, "{} -> {};\n", data_name, name)?;
+                visited
+            }
+            Node::Phi { control, data } => {
+                write!(w, "{} [label=\"phi\"];\n", name)?;
+                let (control_name, mut visited) = write_node(i, control.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", control_name, name)?;
+                for data in data.iter() {
+                    let (data_name, tmp_visited) = write_node(i, data.idx(), module, visited, w)?;
+                    visited = tmp_visited;
+                    write!(w, "{} -> {};\n", data_name, name)?;
+                }
+                visited
+            }
             Node::Return { control, value } => {
-                let (control_name, visited) =
-                    write_node(i, control.idx(), module, constants, visited, w)?;
-                let (value_name, visited) =
-                    write_node(i, value.idx(), module, constants, visited, w)?;
+                let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
+                let (value_name, visited) = write_node(i, value.idx(), module, visited, w)?;
                 write!(w, "{} [label=\"return\"];\n", name)?;
                 write!(w, "{} -> {} [style=\"dashed\"];\n", control_name, name)?;
                 write!(w, "{} -> {};\n", value_name, name)?;
@@ -74,15 +122,51 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::Constant { id } => {
-                write!(w, "{} [label=\"{:?}\"];\n", name, constants[id.idx()])?;
+                write!(
+                    w,
+                    "{} [label=\"{:?}\"];\n",
+                    name,
+                    module.constants[id.idx()]
+                )?;
+                visited
+            }
+            Node::DynamicConstant { id } => {
+                write!(
+                    w,
+                    "{} [label=\"{:?}\"];\n",
+                    name,
+                    module.dynamic_constants[id.idx()]
+                )?;
                 visited
             }
             Node::Add { left, right } => {
-                let (left_name, visited) =
-                    write_node(i, left.idx(), module, constants, visited, w)?;
-                let (right_name, visited) =
-                    write_node(i, right.idx(), module, constants, visited, w)?;
                 write!(w, "{} [label=\"add\"];\n", name)?;
+                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
+                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", left_name, name)?;
+                write!(w, "{} -> {};\n", right_name, name)?;
+                visited
+            }
+            Node::Sub { left, right } => {
+                write!(w, "{} [label=\"sub\"];\n", name)?;
+                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
+                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", left_name, name)?;
+                write!(w, "{} -> {};\n", right_name, name)?;
+                visited
+            }
+            Node::Mul { left, right } => {
+                write!(w, "{} [label=\"mul\"];\n", name)?;
+                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
+                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", left_name, name)?;
+                write!(w, "{} -> {};\n", right_name, name)?;
+                visited
+            }
+            Node::Div { left, right } => {
+                write!(w, "{} [label=\"div\"];\n", name)?;
+                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
+                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
                 write!(w, "{} -> {};\n", left_name, name)?;
                 write!(w, "{} -> {};\n", right_name, name)?;
                 visited
@@ -92,12 +176,6 @@ fn write_node<W: std::fmt::Write>(
                 dynamic_constants,
                 args,
             } => {
-                for arg in args.iter() {
-                    let (arg_name, tmp_visited) =
-                        write_node(i, arg.idx(), module, constants, visited, w)?;
-                    visited = tmp_visited;
-                    write!(w, "{} -> {};\n", arg_name, name)?;
-                }
                 write!(w, "{} [label=\"call<", name,)?;
                 for (idx, id) in dynamic_constants.iter().enumerate() {
                     let dc = &module.dynamic_constants[id.idx()];
@@ -108,6 +186,11 @@ fn write_node<W: std::fmt::Write>(
                     }
                 }
                 write!(w, ">({})\"];\n", module.functions[function.idx()].name)?;
+                for arg in args.iter() {
+                    let (arg_name, tmp_visited) = write_node(i, arg.idx(), module, visited, w)?;
+                    visited = tmp_visited;
+                    write!(w, "{} -> {};\n", arg_name, name)?;
+                }
                 write!(
                     w,
                     "{} -> start_{}_0 [lhead={}];\n",
@@ -137,6 +220,7 @@ fn get_string_node_kind(node: &Node) -> &'static str {
         } => "fork",
         Node::Join {
             control: _,
+            data: _,
             factor: _,
         } => "join",
         Node::Phi {
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 9234371f..392cd460 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -74,6 +74,7 @@ pub enum Node {
     },
     Join {
         control: NodeID,
+        data: NodeID,
         factor: DynamicConstantID,
     },
     Phi {
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 0d0bb81e..1363e016 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -265,10 +265,20 @@ fn parse_fork<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
 }
 
 fn parse_join<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (control, factor)) =
-        parse_tuple2(parse_identifier, |x| parse_dynamic_constant_id(x, context))(ir_text)?;
+    let (ir_text, (control, data, factor)) =
+        parse_tuple3(parse_identifier, parse_identifier, |x| {
+            parse_dynamic_constant_id(x, context)
+        })(ir_text)?;
     let control = context.borrow_mut().get_node_id(control);
-    Ok((ir_text, Node::Join { control, factor }))
+    let data = context.borrow_mut().get_node_id(data);
+    Ok((
+        ir_text,
+        Node::Join {
+            control,
+            data,
+            factor,
+        },
+    ))
 }
 
 fn parse_phi<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-- 
GitLab


From cc14d5d9feb9a0e0ca84338e663addc383a2b248 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 12 Sep 2023 13:18:20 -0500
Subject: [PATCH 007/105] Add remaining dot outputs

---
 hercules_ir/src/dot.rs | 63 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 62 insertions(+), 1 deletion(-)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 47e4e6c5..86b25991 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -200,7 +200,68 @@ fn write_node<W: std::fmt::Write>(
                 )?;
                 visited
             }
-            _ => todo!(),
+            Node::ReadProd { prod, index } => {
+                write!(w, "{} [label=\"read_prod({})\"];\n", name, index)?;
+                let (prod_name, visited) = write_node(i, prod.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", prod_name, name)?;
+                visited
+            }
+            Node::WriteProd { prod, data, index } => {
+                write!(w, "{} [label=\"write_prod({})\"];\n", name, index)?;
+                let (prod_name, visited) = write_node(i, prod.idx(), module, visited, w)?;
+                let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", prod_name, name)?;
+                write!(w, "{} -> {};\n", data_name, name)?;
+                visited
+            }
+            Node::ReadArray { array, index } => {
+                write!(w, "{} [label=\"read_array\"];\n", name)?;
+                let (array_name, mut visited) = write_node(i, array.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", array_name, name)?;
+                for index in index.iter() {
+                    let (index_name, tmp_visited) = write_node(i, index.idx(), module, visited, w)?;
+                    visited = tmp_visited;
+                    write!(w, "{} -> {};\n", index_name, name)?;
+                }
+                visited
+            }
+            Node::WriteArray { array, data, index } => {
+                write!(w, "{} [label=\"write_array\"];\n", name)?;
+                let (array_name, visited) = write_node(i, array.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", array_name, name)?;
+                let (data_name, mut visited) = write_node(i, data.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", data_name, name)?;
+                for index in index.iter() {
+                    let (index_name, tmp_visited) = write_node(i, index.idx(), module, visited, w)?;
+                    visited = tmp_visited;
+                    write!(w, "{} -> {};\n", index_name, name)?;
+                }
+                visited
+            }
+            Node::Match { control, sum } => {
+                write!(w, "{} [label=\"match\"];\n", name)?;
+                let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", control_name, name)?;
+                let (sum_name, visited) = write_node(i, sum.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", sum_name, name)?;
+                visited
+            }
+            Node::BuildSum {
+                data,
+                sum_ty,
+                variant,
+            } => {
+                write!(
+                    w,
+                    "{} [label=\"build_sum({:?}, {})\"];\n",
+                    name,
+                    module.types[sum_ty.idx()],
+                    variant
+                )?;
+                let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
+                write!(w, "{} -> {};\n", data_name, name)?;
+                visited
+            }
         };
         Ok((visited.get(&id).unwrap().clone(), visited))
     }
-- 
GitLab


From cf034332786f1d4bfdc212b4f86e20dc8273255a Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 12 Sep 2023 13:29:59 -0500
Subject: [PATCH 008/105] Add labels to edges in dot graph

---
 hercules_ir/src/dot.rs | 78 ++++++++++++++++++++++--------------------
 samples/simple1.hir    |  4 ++-
 2 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 86b25991..faf80353 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -55,10 +55,10 @@ fn write_node<W: std::fmt::Write>(
             }
             Node::Region { preds } => {
                 write!(w, "{} [label=\"region\"];\n", name)?;
-                for pred in preds.iter() {
+                for (idx, pred) in preds.iter().enumerate() {
                     let (pred_name, tmp_visited) = write_node(i, pred.idx(), module, visited, w)?;
                     visited = tmp_visited;
-                    write!(w, "{} -> {};\n", pred_name, name)?;
+                    write!(w, "{} -> {} [label=\"pred {}\"];\n", pred_name, name, idx)?;
                 }
                 visited
             }
@@ -66,8 +66,8 @@ fn write_node<W: std::fmt::Write>(
                 write!(w, "{} [label=\"if\"];\n", name)?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
                 let (cond_name, visited) = write_node(i, cond.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", control_name, name)?;
-                write!(w, "{} -> {};\n", cond_name, name)?;
+                write!(w, "{} -> {} [label=\"control\"];\n", control_name, name)?;
+                write!(w, "{} -> {} [label=\"cond\"];\n", cond_name, name)?;
                 visited
             }
             Node::Fork { control, factor } => {
@@ -78,7 +78,7 @@ fn write_node<W: std::fmt::Write>(
                     module.dynamic_constants[factor.idx()]
                 )?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", control_name, name)?;
+                write!(w, "{} -> {} [label=\"control\"];\n", control_name, name)?;
                 visited
             }
             Node::Join {
@@ -94,18 +94,18 @@ fn write_node<W: std::fmt::Write>(
                 )?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
                 let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", control_name, name)?;
-                write!(w, "{} -> {};\n", data_name, name)?;
+                write!(w, "{} -> {} [label=\"control\"];\n", control_name, name)?;
+                write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
                 visited
             }
             Node::Phi { control, data } => {
                 write!(w, "{} [label=\"phi\"];\n", name)?;
                 let (control_name, mut visited) = write_node(i, control.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", control_name, name)?;
-                for data in data.iter() {
+                write!(w, "{} -> {} [label=\"control\"];\n", control_name, name)?;
+                for (idx, data) in data.iter().enumerate() {
                     let (data_name, tmp_visited) = write_node(i, data.idx(), module, visited, w)?;
                     visited = tmp_visited;
-                    write!(w, "{} -> {};\n", data_name, name)?;
+                    write!(w, "{} -> {} [label=\"data {}\"];\n", data_name, name, idx)?;
                 }
                 visited
             }
@@ -113,8 +113,12 @@ fn write_node<W: std::fmt::Write>(
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
                 let (value_name, visited) = write_node(i, value.idx(), module, visited, w)?;
                 write!(w, "{} [label=\"return\"];\n", name)?;
-                write!(w, "{} -> {} [style=\"dashed\"];\n", control_name, name)?;
-                write!(w, "{} -> {};\n", value_name, name)?;
+                write!(
+                    w,
+                    "{} -> {} [label = \"control\", style=\"dashed\"];\n",
+                    control_name, name
+                )?;
+                write!(w, "{} -> {} [label=\"value\"];\n", value_name, name)?;
                 visited
             }
             Node::Parameter { index } => {
@@ -133,7 +137,7 @@ fn write_node<W: std::fmt::Write>(
             Node::DynamicConstant { id } => {
                 write!(
                     w,
-                    "{} [label=\"{:?}\"];\n",
+                    "{} [label=\"dynamic_constant({:?})\"];\n",
                     name,
                     module.dynamic_constants[id.idx()]
                 )?;
@@ -143,32 +147,32 @@ fn write_node<W: std::fmt::Write>(
                 write!(w, "{} [label=\"add\"];\n", name)?;
                 let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
                 let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", left_name, name)?;
-                write!(w, "{} -> {};\n", right_name, name)?;
+                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
+                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
                 visited
             }
             Node::Sub { left, right } => {
                 write!(w, "{} [label=\"sub\"];\n", name)?;
                 let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
                 let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", left_name, name)?;
-                write!(w, "{} -> {};\n", right_name, name)?;
+                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
+                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
                 visited
             }
             Node::Mul { left, right } => {
                 write!(w, "{} [label=\"mul\"];\n", name)?;
                 let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
                 let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", left_name, name)?;
-                write!(w, "{} -> {};\n", right_name, name)?;
+                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
+                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
                 visited
             }
             Node::Div { left, right } => {
                 write!(w, "{} [label=\"div\"];\n", name)?;
                 let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
                 let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", left_name, name)?;
-                write!(w, "{} -> {};\n", right_name, name)?;
+                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
+                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
                 visited
             }
             Node::Call {
@@ -186,14 +190,14 @@ fn write_node<W: std::fmt::Write>(
                     }
                 }
                 write!(w, ">({})\"];\n", module.functions[function.idx()].name)?;
-                for arg in args.iter() {
+                for (idx, arg) in args.iter().enumerate() {
                     let (arg_name, tmp_visited) = write_node(i, arg.idx(), module, visited, w)?;
                     visited = tmp_visited;
-                    write!(w, "{} -> {};\n", arg_name, name)?;
+                    write!(w, "{} -> {} [label=\"arg {}\"];\n", arg_name, name, idx)?;
                 }
                 write!(
                     w,
-                    "{} -> start_{}_0 [lhead={}];\n",
+                    "{} -> start_{}_0 [label=\"call\", lhead={}];\n",
                     name,
                     function.idx(),
                     module.functions[function.idx()].name
@@ -203,47 +207,47 @@ fn write_node<W: std::fmt::Write>(
             Node::ReadProd { prod, index } => {
                 write!(w, "{} [label=\"read_prod({})\"];\n", name, index)?;
                 let (prod_name, visited) = write_node(i, prod.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", prod_name, name)?;
+                write!(w, "{} -> {} [label=\"prod\"];\n", prod_name, name)?;
                 visited
             }
             Node::WriteProd { prod, data, index } => {
                 write!(w, "{} [label=\"write_prod({})\"];\n", name, index)?;
                 let (prod_name, visited) = write_node(i, prod.idx(), module, visited, w)?;
                 let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", prod_name, name)?;
-                write!(w, "{} -> {};\n", data_name, name)?;
+                write!(w, "{} -> {} [label=\"prod\"];\n", prod_name, name)?;
+                write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
                 visited
             }
             Node::ReadArray { array, index } => {
                 write!(w, "{} [label=\"read_array\"];\n", name)?;
                 let (array_name, mut visited) = write_node(i, array.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", array_name, name)?;
-                for index in index.iter() {
+                write!(w, "{} -> {} [label=\"array\"];\n", array_name, name)?;
+                for (idx, index) in index.iter().enumerate() {
                     let (index_name, tmp_visited) = write_node(i, index.idx(), module, visited, w)?;
                     visited = tmp_visited;
-                    write!(w, "{} -> {};\n", index_name, name)?;
+                    write!(w, "{} -> {} [label=\"index {}\"];\n", index_name, name, idx)?;
                 }
                 visited
             }
             Node::WriteArray { array, data, index } => {
                 write!(w, "{} [label=\"write_array\"];\n", name)?;
                 let (array_name, visited) = write_node(i, array.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", array_name, name)?;
+                write!(w, "{} -> {} [label=\"array\"];\n", array_name, name)?;
                 let (data_name, mut visited) = write_node(i, data.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", data_name, name)?;
-                for index in index.iter() {
+                write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
+                for (idx, index) in index.iter().enumerate() {
                     let (index_name, tmp_visited) = write_node(i, index.idx(), module, visited, w)?;
                     visited = tmp_visited;
-                    write!(w, "{} -> {};\n", index_name, name)?;
+                    write!(w, "{} -> {} [label=\"index {}\"];\n", index_name, name, idx)?;
                 }
                 visited
             }
             Node::Match { control, sum } => {
                 write!(w, "{} [label=\"match\"];\n", name)?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", control_name, name)?;
+                write!(w, "{} -> {} [label=\"control\"];\n", control_name, name)?;
                 let (sum_name, visited) = write_node(i, sum.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", sum_name, name)?;
+                write!(w, "{} -> {} [label=\"sum\"];\n", sum_name, name)?;
                 visited
             }
             Node::BuildSum {
@@ -259,7 +263,7 @@ fn write_node<W: std::fmt::Write>(
                     variant
                 )?;
                 let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
-                write!(w, "{} -> {};\n", data_name, name)?;
+                write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
                 visited
             }
         };
diff --git a/samples/simple1.hir b/samples/simple1.hir
index 7e0b1d54..a70cadd1 100644
--- a/samples/simple1.hir
+++ b/samples/simple1.hir
@@ -4,7 +4,9 @@ fn myfunc(x: i32) -> i32
 
 fn add<1>(x: i32, y: i32) -> i32
   c = constant(i8, 5)
-  r = return(start, w)
+  dc = dynamic_constant(#0)
+  r = return(start, s)
   w = add(z, c)
+  s = add(w, dc)
   z = add(x, y)
 
-- 
GitLab


From 7210a7cf8f02ff2149890dbce1923b1c5cf5c3c3 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 12 Sep 2023 13:54:43 -0500
Subject: [PATCH 009/105] Matmul example

---
 hercules_ir/src/dot.rs   |  9 +++++++++
 hercules_ir/src/ir.rs    |  4 ++++
 hercules_ir/src/parse.rs | 11 +++++++++++
 samples/simple1.hir      | 30 ++++++++++++++++++++++++++++++
 4 files changed, 54 insertions(+)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index faf80353..7b4b8201 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -175,6 +175,14 @@ fn write_node<W: std::fmt::Write>(
                 write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
                 visited
             }
+            Node::LessThan { left, right } => {
+                write!(w, "{} [label=\"less_than\"];\n", name)?;
+                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
+                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
+                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
+                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
+                visited
+            }
             Node::Call {
                 function,
                 dynamic_constants,
@@ -303,6 +311,7 @@ fn get_string_node_kind(node: &Node) -> &'static str {
         Node::Sub { left: _, right: _ } => "sub",
         Node::Mul { left: _, right: _ } => "mul",
         Node::Div { left: _, right: _ } => "div",
+        Node::LessThan { left: _, right: _ } => "less_than",
         Node::Call {
             function: _,
             dynamic_constants: _,
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 392cd460..090e1561 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -110,6 +110,10 @@ pub enum Node {
         left: NodeID,
         right: NodeID,
     },
+    LessThan {
+        left: NodeID,
+        right: NodeID,
+    },
     Call {
         function: FunctionID,
         dynamic_constants: Box<[DynamicConstantID]>,
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 1363e016..205b1c8a 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -215,6 +215,7 @@ fn parse_node<'a>(
         "sub" => parse_sub(ir_text, context)?,
         "mul" => parse_mul(ir_text, context)?,
         "div" => parse_div(ir_text, context)?,
+        "less_than" => parse_less_than(ir_text, context)?,
         "call" => parse_call(ir_text, context)?,
         "read_prod" => parse_read_prod(ir_text, context)?,
         "write_prod" => parse_write_prod(ir_text, context)?,
@@ -364,6 +365,16 @@ fn parse_div<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResu
     Ok((ir_text, Node::Div { left, right }))
 }
 
+fn parse_less_than<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let left = context.borrow_mut().get_node_id(left);
+    let right = context.borrow_mut().get_node_id(right);
+    Ok((ir_text, Node::LessThan { left, right }))
+}
+
 fn parse_call<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let parse_dynamic_constants =
diff --git a/samples/simple1.hir b/samples/simple1.hir
index a70cadd1..71c3e3bc 100644
--- a/samples/simple1.hir
+++ b/samples/simple1.hir
@@ -10,3 +10,33 @@ fn add<1>(x: i32, y: i32) -> i32
   s = add(w, dc)
   z = add(x, y)
 
+fn matmul<3>(a: array(f32, #0, #1), b: array(f32, #1, #2)) -> array(f32, #0, #2)
+  i = fork(start, #0)
+  i_ctrl = read_prod(i, 0)
+  i_idx = read_prod(i, 1)
+  k = fork(i_ctrl, #2)
+  k_ctrl = read_prod(k, 0)
+  k_idx = read_prod(k, 1)
+  zero_idx = constant(u64, 0)
+  one_idx = constant(u64, 1)
+  zero_val = constant(f32, 0)
+  loop = region(k_ctrl, if_true)
+  j = phi(loop, zero_idx, j_inc)
+  sum = phi(loop, zero_val, sum_inc)
+  j_inc = add(j, one_idx)
+  val1 = read_array(a, i_idx, j)
+  val2 = read_array(b, j, k_idx)
+  mul = mul(val1, val2)
+  sum_inc = add(sum, mul)
+  j_size = dynamic_constant(#1)
+  less = less_than(j_inc, j_size)
+  if = if(loop, less)
+  if_false = read_prod(if, 0)
+  if_true = read_prod(if, 1)
+  k_join = join(if_false, sum_inc, #2)
+  k_join_ctrl = read_prod(k_join, 0)
+  k_join_data = read_prod(k_join, 1)
+  i_join = join(k_join_ctrl, k_join_data, #0)
+  i_join_ctrl = read_prod(i_join, 0)
+  i_join_data = read_prod(i_join, 1)
+  r = return(i_join_ctrl, i_join_data)
\ No newline at end of file
-- 
GitLab


From 7e09a2b2bd82deb876c730ef50fe8699cbc87aa6 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 12 Sep 2023 14:02:02 -0500
Subject: [PATCH 010/105] Dash all control edges

---
 hercules_ir/src/dot.rs | 32 ++++++++++++++++++++++++++------
 samples/matmul.hir     | 30 ++++++++++++++++++++++++++++++
 samples/simple1.hir    | 33 +--------------------------------
 3 files changed, 57 insertions(+), 38 deletions(-)
 create mode 100644 samples/matmul.hir

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 7b4b8201..f8ebd118 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -66,7 +66,11 @@ fn write_node<W: std::fmt::Write>(
                 write!(w, "{} [label=\"if\"];\n", name)?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
                 let (cond_name, visited) = write_node(i, cond.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"control\"];\n", control_name, name)?;
+                write!(
+                    w,
+                    "{} -> {} [label=\"control\", style=\"dashed\"];\n",
+                    control_name, name
+                )?;
                 write!(w, "{} -> {} [label=\"cond\"];\n", cond_name, name)?;
                 visited
             }
@@ -78,7 +82,11 @@ fn write_node<W: std::fmt::Write>(
                     module.dynamic_constants[factor.idx()]
                 )?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"control\"];\n", control_name, name)?;
+                write!(
+                    w,
+                    "{} -> {} [label=\"control\", style=\"dashed\"];\n",
+                    control_name, name
+                )?;
                 visited
             }
             Node::Join {
@@ -94,14 +102,22 @@ fn write_node<W: std::fmt::Write>(
                 )?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
                 let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"control\"];\n", control_name, name)?;
+                write!(
+                    w,
+                    "{} -> {} [label=\"control\", style=\"dashed\"];\n",
+                    control_name, name
+                )?;
                 write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
                 visited
             }
             Node::Phi { control, data } => {
                 write!(w, "{} [label=\"phi\"];\n", name)?;
                 let (control_name, mut visited) = write_node(i, control.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"control\"];\n", control_name, name)?;
+                write!(
+                    w,
+                    "{} -> {} [label=\"control\", style=\"dashed\"];\n",
+                    control_name, name
+                )?;
                 for (idx, data) in data.iter().enumerate() {
                     let (data_name, tmp_visited) = write_node(i, data.idx(), module, visited, w)?;
                     visited = tmp_visited;
@@ -115,7 +131,7 @@ fn write_node<W: std::fmt::Write>(
                 write!(w, "{} [label=\"return\"];\n", name)?;
                 write!(
                     w,
-                    "{} -> {} [label = \"control\", style=\"dashed\"];\n",
+                    "{} -> {} [label=\"control\", style=\"dashed\"];\n",
                     control_name, name
                 )?;
                 write!(w, "{} -> {} [label=\"value\"];\n", value_name, name)?;
@@ -253,7 +269,11 @@ fn write_node<W: std::fmt::Write>(
             Node::Match { control, sum } => {
                 write!(w, "{} [label=\"match\"];\n", name)?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"control\"];\n", control_name, name)?;
+                write!(
+                    w,
+                    "{} -> {} [label=\"control\", style=\"dashed\"];\n",
+                    control_name, name
+                )?;
                 let (sum_name, visited) = write_node(i, sum.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"sum\"];\n", sum_name, name)?;
                 visited
diff --git a/samples/matmul.hir b/samples/matmul.hir
new file mode 100644
index 00000000..3a7f34ae
--- /dev/null
+++ b/samples/matmul.hir
@@ -0,0 +1,30 @@
+fn matmul<3>(a: array(f32, #0, #1), b: array(f32, #1, #2)) -> array(f32, #0, #2)
+  i = fork(start, #0)
+  i_ctrl = read_prod(i, 0)
+  i_idx = read_prod(i, 1)
+  k = fork(i_ctrl, #2)
+  k_ctrl = read_prod(k, 0)
+  k_idx = read_prod(k, 1)
+  zero_idx = constant(u64, 0)
+  one_idx = constant(u64, 1)
+  zero_val = constant(f32, 0)
+  loop = region(k_ctrl, if_true)
+  j = phi(loop, zero_idx, j_inc)
+  sum = phi(loop, zero_val, sum_inc)
+  j_inc = add(j, one_idx)
+  val1 = read_array(a, i_idx, j)
+  val2 = read_array(b, j, k_idx)
+  mul = mul(val1, val2)
+  sum_inc = add(sum, mul)
+  j_size = dynamic_constant(#1)
+  less = less_than(j_inc, j_size)
+  if = if(loop, less)
+  if_false = read_prod(if, 0)
+  if_true = read_prod(if, 1)
+  k_join = join(if_false, sum_inc, #2)
+  k_join_ctrl = read_prod(k_join, 0)
+  k_join_data = read_prod(k_join, 1)
+  i_join = join(k_join_ctrl, k_join_data, #0)
+  i_join_ctrl = read_prod(i_join, 0)
+  i_join_data = read_prod(i_join, 1)
+  r = return(i_join_ctrl, i_join_data)
diff --git a/samples/simple1.hir b/samples/simple1.hir
index 71c3e3bc..415b2bc3 100644
--- a/samples/simple1.hir
+++ b/samples/simple1.hir
@@ -8,35 +8,4 @@ fn add<1>(x: i32, y: i32) -> i32
   r = return(start, s)
   w = add(z, c)
   s = add(w, dc)
-  z = add(x, y)
-
-fn matmul<3>(a: array(f32, #0, #1), b: array(f32, #1, #2)) -> array(f32, #0, #2)
-  i = fork(start, #0)
-  i_ctrl = read_prod(i, 0)
-  i_idx = read_prod(i, 1)
-  k = fork(i_ctrl, #2)
-  k_ctrl = read_prod(k, 0)
-  k_idx = read_prod(k, 1)
-  zero_idx = constant(u64, 0)
-  one_idx = constant(u64, 1)
-  zero_val = constant(f32, 0)
-  loop = region(k_ctrl, if_true)
-  j = phi(loop, zero_idx, j_inc)
-  sum = phi(loop, zero_val, sum_inc)
-  j_inc = add(j, one_idx)
-  val1 = read_array(a, i_idx, j)
-  val2 = read_array(b, j, k_idx)
-  mul = mul(val1, val2)
-  sum_inc = add(sum, mul)
-  j_size = dynamic_constant(#1)
-  less = less_than(j_inc, j_size)
-  if = if(loop, less)
-  if_false = read_prod(if, 0)
-  if_true = read_prod(if, 1)
-  k_join = join(if_false, sum_inc, #2)
-  k_join_ctrl = read_prod(k_join, 0)
-  k_join_data = read_prod(k_join, 1)
-  i_join = join(k_join_ctrl, k_join_data, #0)
-  i_join_ctrl = read_prod(i_join, 0)
-  i_join_data = read_prod(i_join, 1)
-  r = return(i_join_ctrl, i_join_data)
\ No newline at end of file
+  z = add(x, y)
\ No newline at end of file
-- 
GitLab


From efdc1bf5978d39474481e24ac0f73a952f8649e5 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 12 Sep 2023 14:19:31 -0500
Subject: [PATCH 011/105] Multidim arrays must be nested internally

---
 hercules_ir/src/dot.rs   |  24 +++----
 hercules_ir/src/ir.rs    |   6 +-
 hercules_ir/src/parse.rs | 150 +++++++++------------------------------
 samples/matmul.hir       |   8 ++-
 4 files changed, 54 insertions(+), 134 deletions(-)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index f8ebd118..f2619d2b 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -58,7 +58,11 @@ fn write_node<W: std::fmt::Write>(
                 for (idx, pred) in preds.iter().enumerate() {
                     let (pred_name, tmp_visited) = write_node(i, pred.idx(), module, visited, w)?;
                     visited = tmp_visited;
-                    write!(w, "{} -> {} [label=\"pred {}\"];\n", pred_name, name, idx)?;
+                    write!(
+                        w,
+                        "{} -> {} [label=\"pred {}\", style=\"dashed\"];\n",
+                        pred_name, name, idx
+                    )?;
                 }
                 visited
             }
@@ -244,26 +248,20 @@ fn write_node<W: std::fmt::Write>(
             }
             Node::ReadArray { array, index } => {
                 write!(w, "{} [label=\"read_array\"];\n", name)?;
-                let (array_name, mut visited) = write_node(i, array.idx(), module, visited, w)?;
+                let (array_name, visited) = write_node(i, array.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"array\"];\n", array_name, name)?;
-                for (idx, index) in index.iter().enumerate() {
-                    let (index_name, tmp_visited) = write_node(i, index.idx(), module, visited, w)?;
-                    visited = tmp_visited;
-                    write!(w, "{} -> {} [label=\"index {}\"];\n", index_name, name, idx)?;
-                }
+                let (index_name, visited) = write_node(i, index.idx(), module, visited, w)?;
+                write!(w, "{} -> {} [label=\"index\"];\n", index_name, name)?;
                 visited
             }
             Node::WriteArray { array, data, index } => {
                 write!(w, "{} [label=\"write_array\"];\n", name)?;
                 let (array_name, visited) = write_node(i, array.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"array\"];\n", array_name, name)?;
-                let (data_name, mut visited) = write_node(i, data.idx(), module, visited, w)?;
+                let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
-                for (idx, index) in index.iter().enumerate() {
-                    let (index_name, tmp_visited) = write_node(i, index.idx(), module, visited, w)?;
-                    visited = tmp_visited;
-                    write!(w, "{} -> {} [label=\"index {}\"];\n", index_name, name, idx)?;
-                }
+                let (index_name, visited) = write_node(i, index.idx(), module, visited, w)?;
+                write!(w, "{} -> {} [label=\"index\"];\n", index_name, name)?;
                 visited
             }
             Node::Match { control, sum } => {
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 090e1561..311cb8cf 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -32,7 +32,7 @@ pub enum Type {
     Float64,
     Product(Box<[TypeID]>),
     Summation(Box<[TypeID]>),
-    Array(TypeID, Box<[DynamicConstantID]>),
+    Array(TypeID, DynamicConstantID),
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -130,12 +130,12 @@ pub enum Node {
     },
     ReadArray {
         array: NodeID,
-        index: Box<[NodeID]>,
+        index: NodeID,
     },
     WriteArray {
         array: NodeID,
         data: NodeID,
-        index: Box<[NodeID]>,
+        index: NodeID,
     },
     Match {
         control: NodeID,
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 205b1c8a..39b29421 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -449,22 +449,9 @@ fn parse_read_array<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (array, index)) = parse_tuple2(
-        parse_identifier,
-        nom::multi::separated_list1(
-            nom::sequence::tuple((
-                nom::character::complete::multispace0,
-                nom::character::complete::char(','),
-                nom::character::complete::multispace0,
-            )),
-            parse_identifier,
-        ),
-    )(ir_text)?;
+    let (ir_text, (array, index)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
     let array = context.borrow_mut().get_node_id(array);
-    let index = index
-        .into_iter()
-        .map(|x| context.borrow_mut().get_node_id(x))
-        .collect();
+    let index = context.borrow_mut().get_node_id(index);
     Ok((ir_text, Node::ReadArray { array, index }))
 }
 
@@ -472,24 +459,11 @@ fn parse_write_array<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (array, data, index)) = parse_tuple3(
-        parse_identifier,
-        parse_identifier,
-        nom::multi::separated_list1(
-            nom::sequence::tuple((
-                nom::character::complete::multispace0,
-                nom::character::complete::char(','),
-                nom::character::complete::multispace0,
-            )),
-            parse_identifier,
-        ),
-    )(ir_text)?;
+    let (ir_text, (array, data, index)) =
+        parse_tuple3(parse_identifier, parse_identifier, parse_identifier)(ir_text)?;
     let array = context.borrow_mut().get_node_id(array);
     let data = context.borrow_mut().get_node_id(data);
-    let index = index
-        .into_iter()
-        .map(|x| context.borrow_mut().get_node_id(x))
-        .collect();
+    let index = context.borrow_mut().get_node_id(index);
     Ok((ir_text, Node::WriteArray { array, data, index }))
 }
 
@@ -611,20 +585,11 @@ fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
                 nom::character::complete::multispace0,
                 nom::character::complete::char(','),
                 nom::character::complete::multispace0,
-                nom::multi::separated_list1(
-                    nom::sequence::tuple((
-                        nom::character::complete::multispace0,
-                        nom::character::complete::char(','),
-                        nom::character::complete::multispace0,
-                    )),
-                    |x| parse_dynamic_constant_id(x, context),
-                ),
+                |x| parse_dynamic_constant_id(x, context),
                 nom::character::complete::multispace0,
                 nom::character::complete::char(')'),
             )),
-            |(_, _, _, _, ty_id, _, _, _, dc_ids, _, _)| {
-                Type::Array(ty_id, dc_ids.into_boxed_slice())
-            },
+            |(_, _, _, _, ty_id, _, _, _, dc_id, _, _)| Type::Array(ty_id, dc_id),
         ),
     ))(ir_text)?;
     Ok((ir_text, ty))
@@ -700,11 +665,11 @@ fn parse_constant<'a>(
             tys,
             context,
         )?,
-        Type::Array(elem_ty, dc_bounds) => parse_array_constant(
+        Type::Array(elem_ty, dc_bound) => parse_array_constant(
             ir_text,
             context.borrow_mut().get_type_id(ty.clone()),
             elem_ty,
-            dc_bounds,
+            dc_bound,
             context,
         )?,
     };
@@ -846,84 +811,39 @@ fn parse_array_constant<'a>(
     ir_text: &'a str,
     array_ty: TypeID,
     elem_ty: TypeID,
-    dc_bounds: Box<[DynamicConstantID]>,
+    dc_bound: DynamicConstantID,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Constant> {
-    let mut bounds = vec![];
-    let borrow = context.borrow();
-    let mut total_elems = 1;
-    for dc in dc_bounds.iter() {
-        let dc = borrow.reverse_dynamic_constant_map.get(dc).unwrap();
-        match dc {
-            DynamicConstant::Constant(b) => {
-                if *b == 0 {
-                    Err(nom::Err::Error(nom::error::Error {
-                        input: ir_text,
-                        code: nom::error::ErrorKind::IsNot,
-                    }))?
-                }
-                total_elems *= b;
-                bounds.push(*b);
-            }
-            _ => Err(nom::Err::Error(nom::error::Error {
-                input: ir_text,
-                code: nom::error::ErrorKind::IsNot,
-            }))?,
-        }
-    }
-    let mut contents = vec![];
-    let ir_text =
-        parse_array_constant_helper(ir_text, elem_ty, bounds.as_slice(), &mut contents, context)?.0;
+    let ir_text = nom::character::complete::multispace0(ir_text)?.0;
+    let ir_text = nom::character::complete::char('[')(ir_text)?.0;
+    let ir_text = nom::character::complete::multispace0(ir_text)?.0;
+    let (ir_text, entries) = nom::multi::separated_list1(
+        nom::sequence::tuple((
+            nom::character::complete::multispace0,
+            nom::character::complete::char(','),
+            nom::character::complete::multispace0,
+        )),
+        |x| {
+            parse_constant_id(
+                x,
+                context
+                    .borrow()
+                    .reverse_type_map
+                    .get(&elem_ty)
+                    .unwrap()
+                    .clone(),
+                context,
+            )
+        },
+    )(ir_text)?;
+    let ir_text = nom::character::complete::multispace0(ir_text)?.0;
+    let ir_text = nom::character::complete::char(']')(ir_text)?.0;
     Ok((
         ir_text,
-        Constant::Array(array_ty, contents.into_boxed_slice()),
+        Constant::Array(elem_ty, entries.into_boxed_slice()),
     ))
 }
 
-fn parse_array_constant_helper<'a>(
-    ir_text: &'a str,
-    elem_ty: TypeID,
-    bounds: &[usize],
-    contents: &mut Vec<ConstantID>,
-    context: &RefCell<Context<'a>>,
-) -> nom::IResult<&'a str, ()> {
-    if bounds.len() > 0 {
-        let ir_text = nom::character::complete::multispace0(ir_text)?.0;
-        let ir_text = nom::character::complete::char('[')(ir_text)?.0;
-        let ir_text = nom::character::complete::multispace0(ir_text)?.0;
-        let (ir_text, empties) = nom::multi::separated_list1(
-            nom::sequence::tuple((
-                nom::character::complete::multispace0,
-                nom::character::complete::char(','),
-                nom::character::complete::multispace0,
-            )),
-            |x| parse_array_constant_helper(x, elem_ty, bounds, contents, context),
-        )(ir_text)?;
-        if empties.len() != bounds[0] {
-            Err(nom::Err::Error(nom::error::Error {
-                input: ir_text,
-                code: nom::error::ErrorKind::IsNot,
-            }))?
-        }
-        let ir_text = nom::character::complete::multispace0(ir_text)?.0;
-        let ir_text = nom::character::complete::char(']')(ir_text)?.0;
-        Ok((ir_text, ()))
-    } else {
-        let (ir_text, id) = parse_constant_id(
-            ir_text,
-            context
-                .borrow()
-                .reverse_type_map
-                .get(&elem_ty)
-                .unwrap()
-                .clone(),
-            context,
-        )?;
-        contents.push(id);
-        Ok((ir_text, ()))
-    }
-}
-
 fn parse_identifier<'a>(ir_text: &'a str) -> nom::IResult<&'a str, &'a str> {
     nom::combinator::verify(
         nom::bytes::complete::is_a(
diff --git a/samples/matmul.hir b/samples/matmul.hir
index 3a7f34ae..fe9975e9 100644
--- a/samples/matmul.hir
+++ b/samples/matmul.hir
@@ -1,4 +1,4 @@
-fn matmul<3>(a: array(f32, #0, #1), b: array(f32, #1, #2)) -> array(f32, #0, #2)
+fn matmul<3>(a: array(array(f32, #1), #0), b: array(array(f32, #2), #1)) -> array(array(f32, #2), #0)
   i = fork(start, #0)
   i_ctrl = read_prod(i, 0)
   i_idx = read_prod(i, 1)
@@ -12,8 +12,10 @@ fn matmul<3>(a: array(f32, #0, #1), b: array(f32, #1, #2)) -> array(f32, #0, #2)
   j = phi(loop, zero_idx, j_inc)
   sum = phi(loop, zero_val, sum_inc)
   j_inc = add(j, one_idx)
-  val1 = read_array(a, i_idx, j)
-  val2 = read_array(b, j, k_idx)
+  fval1 = read_array(a, i_idx)
+  fval2 = read_array(b, j)
+  val1 = read_array(fval1, j)
+  val2 = read_array(fval2, k_idx)
   mul = mul(val1, val2)
   sum_inc = add(sum, mul)
   j_size = dynamic_constant(#1)
-- 
GitLab


From 91ae8095f35b6e5ca2651062844ef8716faef9a7 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 13 Sep 2023 10:11:34 -0500
Subject: [PATCH 012/105] Comment ir.rs

---
 hercules_ir/src/ir.rs | 60 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 311cb8cf..26992989 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -1,5 +1,11 @@
 extern crate ordered_float;
 
+/*
+ * A module is a list of functions. Functions contain types, constants, and
+ * dynamic constants, which are interned at the module level. Thus, if one
+ * wants to run an intraprocedural pass in parallel, it is advised to first
+ * destruct the module, then reconstruct it once finished.
+ */
 #[derive(Debug, Clone)]
 pub struct Module {
     pub functions: Vec<Function>,
@@ -8,6 +14,14 @@ pub struct Module {
     pub dynamic_constants: Vec<DynamicConstant>,
 }
 
+/*
+ * A function has a name, a list of types for its parameters, a single return
+ * type, a list of nodes in its sea-of-nodes style IR, and a number of dynamic
+ * constants. When calling a function, arguments matching the parameter types
+ * are required, as well as the correct number of dynamic constants. All
+ * dynamic constants are 64-bit unsigned integers (usize / u64), so it is
+ * sufficient to merely store how many of them the function takes as arguments.
+ */
 #[derive(Debug, Clone)]
 pub struct Function {
     pub name: String,
@@ -17,6 +31,19 @@ pub struct Function {
     pub num_dynamic_constants: u32,
 }
 
+/*
+ * Hercules IR has a fairly standard type system, with the exception of the
+ * control type. Hercules IR is based off of the sea-of-nodes IR, the main
+ * feature of which being a merged control and data flow graph. Thus, control
+ * is a type of value, just like any other type. However, the type system is
+ * very restrictive over what can be done with control values. A novel addition
+ * in Hercules IR is that a control type is parameterized by its thread count.
+ * This is the mechanism in Hercules IR for representing parallelism. Summation
+ * types are an IR equivalent of Rust's enum types. These are lowered into
+ * tagged unions during scheduling. Array types are one-dimensional. Multi-
+ * dimensional arrays are represented by nesting array types. An array extent
+ * is represented with a dynamic constant.
+ */
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Type {
     Control(DynamicConstantID),
@@ -35,6 +62,14 @@ pub enum Type {
     Array(TypeID, DynamicConstantID),
 }
 
+/*
+ * Constants are pretty standard in Hercules IR. Float constants used the
+ * ordered_float crate so that constants can be keys in maps (used for
+ * interning constants during IR construction). Product, summation, and array
+ * constants all contain their own type. This is only strictly necessary for
+ * summation types, but provides a nice mechanism for sanity checking for
+ * product and array types as well.
+ */
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Constant {
     Integer8(i8),
@@ -52,12 +87,34 @@ pub enum Constant {
     Array(TypeID, Box<[ConstantID]>),
 }
 
+/*
+ * Dynamic constants are unsigned 64-bit integers passed to a Hercules function
+ * at runtime using the Hercules runtime API. They cannot be the result of
+ * computations in Hercules IR. For a single execution of a Hercules function,
+ * dynamic constants are constant throughout execution. This provides a
+ * mechanism by which Hercules functions can operate on arrays with variable
+ * length, while not needing Hercules functions to perform dynamic memory
+ * allocation - by providing dynamic constants to the runtime API, the runtime
+ * can allocate memory as necessary.
+ */
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum DynamicConstant {
     Constant(usize),
     Parameter(usize),
 }
 
+/*
+ * Hercules IR is a combination of a possibly cylic control flow graph, and
+ * many acyclic data flow graphs. Each node represents some operation on input
+ * values (including control), and produces some output value. Operations that
+ * conceptually produce multiple outputs (such as an if node) produce a product
+ * type instead. For example, the if node produces prod(control(N),
+ * control(N)), where the first control token represents the false branch, and
+ * the second control token represents the true branch. Another example is the
+ * fork node, which produces prod(control(N*k), u64), where the u64 is the
+ * thread ID. Functions are devoid of side effects, so call nodes don't take as
+ * input or output control tokens. There is also no global memory - use arrays.
+ */
 #[derive(Debug, Clone)]
 pub enum Node {
     Start,
@@ -148,6 +205,9 @@ pub enum Node {
     },
 }
 
+/*
+ * Rust things to make newtyped IDs usable.
+ */
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub struct FunctionID(u32);
 
-- 
GitLab


From 371b692ec5648dbab916c8c48db8254f0ba06718 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 13 Sep 2023 11:02:44 -0500
Subject: [PATCH 013/105] Comment parse.rs

---
 hercules_ir/src/dot.rs   |   2 +-
 hercules_ir/src/parse.rs | 144 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 134 insertions(+), 12 deletions(-)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index f2619d2b..8d97e77d 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -142,7 +142,7 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::Parameter { index } => {
-                write!(w, "{} [label=\"param #{}\"];\n", name, index)?;
+                write!(w, "{} [label=\"param #{}\"];\n", name, index + 1)?;
                 visited
             }
             Node::Constant { id } => {
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 39b29421..4c91bc28 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -6,10 +6,21 @@ use std::str::FromStr;
 
 use crate::*;
 
+/*
+ * Top level parse function.
+ */
 pub fn parse(ir_test: &str) -> Module {
     parse_module(ir_test, Context::default()).unwrap().1
 }
 
+/*
+ * This is a context sensitive parser. We parse directly into the graph data
+ * structure inside ir::Module, so this is where we perform interning.
+ * We intern function names, node names, types, constants, and dynamic
+ * constants. Sometimes, types and dynamic constants need to be looked up, so
+ * we also maintain reverse intern maps for that purpose. IDs are assigned
+ * in increasing order, based on the intern map's size.
+ */
 #[derive(Default)]
 struct Context<'a> {
     function_ids: HashMap<&'a str, FunctionID>,
@@ -21,6 +32,10 @@ struct Context<'a> {
     reverse_dynamic_constant_map: HashMap<DynamicConstantID, DynamicConstant>,
 }
 
+/*
+ * Interning functions. In general, all modifications to intern maps should be
+ * done through these functions.
+ */
 impl<'a> Context<'a> {
     fn get_function_id(&mut self, name: &'a str) -> FunctionID {
         if let Some(id) = self.function_ids.get(name) {
@@ -77,13 +92,22 @@ impl<'a> Context<'a> {
     }
 }
 
+/*
+ * A module is just a file with a list of functions.
+ */
 fn parse_module<'a>(ir_text: &'a str, context: Context<'a>) -> nom::IResult<&'a str, Module> {
     let context = RefCell::new(context);
+
+    // If there is any text left after successfully parsing some functions,
+    // treat that as an error.
     let (rest, functions) =
         nom::combinator::all_consuming(nom::multi::many0(|x| parse_function(x, &context)))(
             ir_text,
         )?;
     let mut context = context.into_inner();
+
+    // functions, as returned by parsing, is in parse order, which may differ
+    // from the order dictated by FunctionIDs in the function name intern map.
     let mut fixed_functions = vec![
         Function {
             name: String::from(""),
@@ -96,9 +120,15 @@ fn parse_module<'a>(ir_text: &'a str, context: Context<'a>) -> nom::IResult<&'a
     ];
     for function in functions {
         let function_name = function.name.clone();
+
+        // We can remove items from context now, as it's going to be destroyed
+        // anyway.
         let function_id = context.function_ids.remove(function_name.as_str()).unwrap();
         fixed_functions[function_id.idx()] = function;
     }
+
+    // Assemble flat lists of interned goodies, now that we've figured out
+    // everyones' IDs.
     let mut types = vec![Type::Control(DynamicConstantID::new(0)); context.interned_types.len()];
     for (ty, id) in context.interned_types {
         types[id.idx()] = ty;
@@ -121,11 +151,20 @@ fn parse_module<'a>(ir_text: &'a str, context: Context<'a>) -> nom::IResult<&'a
     Ok((rest, module))
 }
 
+/*
+ * A function is a function declaration, followed by a list of node statements.
+ */
 fn parse_function<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Function> {
+    // Each function contains its own list of interned nodes, so we need to
+    // clear the node name intern map.
     context.borrow_mut().node_ids.clear();
+
+    // This parser isn't split into lexing and parsing steps. So, we very
+    // frequently need to eat whitespace. Is this ugly? Yes. Does it work? Also
+    // yes.
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::bytes::complete::tag("fn")(ir_text)?.0;
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
@@ -141,6 +180,8 @@ fn parse_function<'a>(
     };
     let (ir_text, num_dynamic_constants) =
         nom::combinator::opt(parse_num_dynamic_constants)(ir_text)?;
+
+    // If unspecified, assumed function has no dynamic constant arguments.
     let num_dynamic_constants = num_dynamic_constants.unwrap_or(0);
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::character::complete::char('(')(ir_text)?.0;
@@ -156,29 +197,49 @@ fn parse_function<'a>(
             nom::character::complete::multispace0,
         )),
     )(ir_text)?;
-    context
-        .borrow_mut()
-        .node_ids
-        .insert("start", NodeID::new(0));
+
+    // The start node is not explicitly specified in the textual IR, so create
+    // it manually.
+    context.borrow_mut().get_node_id("start");
+
+    // Insert nodes for each parameter.
     for param in params.iter() {
-        let id = NodeID::new(context.borrow().node_ids.len());
-        context.borrow_mut().node_ids.insert(param.1, id);
+        context.borrow_mut().get_node_id(param.1);
     }
     let ir_text = nom::character::complete::char(')')(ir_text)?.0;
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::bytes::complete::tag("->")(ir_text)?.0;
     let (ir_text, return_type) = parse_type_id(ir_text, context)?;
     let (ir_text, nodes) = nom::multi::many1(|x| parse_node(x, context))(ir_text)?;
+
+    // nodes, as returned by parsing, is in parse order, which may differ from
+    // the order dictated by NodeIDs in the node name intern map.
     let mut fixed_nodes = vec![Node::Start; context.borrow().node_ids.len()];
     for (name, node) in nodes {
+        // We can remove items from the node name intern map now, as the map
+        // will be cleared during the next iteration of parse_function.
         fixed_nodes[context.borrow_mut().node_ids.remove(name).unwrap().idx()] = node;
     }
+
+    // The nodes removed from node_ids in the previous step are nodes that are
+    // defined in statements parsed by parse_node. There are 2 kinds of nodes
+    // that aren't defined in statements inside the function body: the start
+    // node, and the parameter nodes. The node at ID 0 is already a start node,
+    // by the initialization of fixed_nodes. Here, we set the other nodes to
+    // parameter nodes. The node id in node_ids corresponds to the parameter
+    // index + 1, because in parse_function, we add the parameter names to
+    // node_ids (a.k.a. the node name intern map) in order, after adding the
+    // start node.
     for (_, id) in context.borrow().node_ids.iter() {
         if id.idx() != 0 {
-            fixed_nodes[id.idx()] = Node::Parameter { index: id.idx() }
+            fixed_nodes[id.idx()] = Node::Parameter {
+                index: id.idx() - 1,
+            }
         }
     }
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
+
+    // Intern function name.
     context.borrow_mut().get_function_id(function_name);
     Ok((
         ir_text,
@@ -192,6 +253,10 @@ fn parse_function<'a>(
     ))
 }
 
+/*
+ * A node is a statement of the form a = b(c), where a is the name of the output
+ * of the node, b is the node type, and c is a list of arguments.
+ */
 fn parse_node<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
@@ -228,6 +293,8 @@ fn parse_node<'a>(
             code: nom::error::ErrorKind::IsNot,
         }))?,
     };
+
+    // Intern node name.
     context.borrow_mut().get_node_id(node_name);
     Ok((ir_text, (node_name, node)))
 }
@@ -236,6 +303,13 @@ fn parse_region<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Node> {
+    // Each of these parse node functions are very similar. The node name and
+    // type have already been parsed, so here we just parse the node's
+    // arguments. These are always in between parantheses and separated by
+    // commas, so there are parse_tupleN utility functions that do this. If
+    // there is a variable amount of arguments, then we need to represent that
+    // explicitly using nom's separated list functionality. This example here
+    // is a bit of an abuse of what parse_tupleN functions are meant for.
     let (ir_text, (preds,)) = parse_tuple1(nom::multi::separated_list1(
         nom::sequence::tuple((
             nom::character::complete::multispace0,
@@ -244,6 +318,9 @@ fn parse_region<'a>(
         )),
         parse_identifier,
     ))(ir_text)?;
+
+    // When the parsed arguments are node names, we need to look up their ID in
+    // the node name intern map.
     let preds = preds
         .into_iter()
         .map(|x| context.borrow_mut().get_node_id(x))
@@ -262,6 +339,9 @@ fn parse_fork<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
     let (ir_text, (control, factor)) =
         parse_tuple2(parse_identifier, |x| parse_dynamic_constant_id(x, context))(ir_text)?;
     let control = context.borrow_mut().get_node_id(control);
+
+    // Because parse_dynamic_constant_id returned a DynamicConstantID directly,
+    // we don't need to manually convert it here.
     Ok((ir_text, Node::Fork { control, factor }))
 }
 
@@ -316,6 +396,8 @@ fn parse_constant_node<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Node> {
+    // Here, we don't use parse_tuple2 because there is a dependency between
+    // the parse functions of the 2 arguments.
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::character::complete::char('(')(ir_text)?.0;
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
@@ -376,6 +458,10 @@ fn parse_less_than<'a>(
 }
 
 fn parse_call<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
+    // Call nodes are a bit complicated because they 1. optionally take dynamic
+    // constants as "arguments" (though these are specified between <>), 2.
+    // take a function name as an argument, and 3. take a variable number of
+    // normal arguments.
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let parse_dynamic_constants =
         |ir_text: &'a str| -> nom::IResult<&'a str, Vec<DynamicConstantID>> {
@@ -508,8 +594,11 @@ fn parse_build_sum<'a>(
 }
 
 fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Type> {
+    // Parser combinators are very convenient, if a bit hard to read.
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let (ir_text, ty) = nom::branch::alt((
+        // Control tokens are parameterized by a dynamic constant representing
+        // their thread count.
         nom::combinator::map(
             nom::sequence::tuple((
                 nom::bytes::complete::tag("ctrl"),
@@ -521,6 +610,7 @@ fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
             )),
             |(_, _, _, id, _, _)| Type::Control(id),
         ),
+        // Primitive types are written in Rust style.
         nom::combinator::map(nom::bytes::complete::tag("i8"), |_| Type::Integer8),
         nom::combinator::map(nom::bytes::complete::tag("i16"), |_| Type::Integer16),
         nom::combinator::map(nom::bytes::complete::tag("i32"), |_| Type::Integer32),
@@ -537,6 +627,7 @@ fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
         }),
         nom::combinator::map(nom::bytes::complete::tag("f32"), |_| Type::Float32),
         nom::combinator::map(nom::bytes::complete::tag("f64"), |_| Type::Float64),
+        // Product types are parsed as a list of their element types.
         nom::combinator::map(
             nom::sequence::tuple((
                 nom::bytes::complete::tag("prod"),
@@ -556,6 +647,7 @@ fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
             )),
             |(_, _, _, _, ids, _, _)| Type::Product(ids.into_boxed_slice()),
         ),
+        // Sum types are parsed as a list of their variant types.
         nom::combinator::map(
             nom::sequence::tuple((
                 nom::bytes::complete::tag("sum"),
@@ -575,6 +667,8 @@ fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
             )),
             |(_, _, _, _, ids, _, _)| Type::Summation(ids.into_boxed_slice()),
         ),
+        // Array types are just a pair between an element type and a dynamic
+        // constant representing its extent.
         nom::combinator::map(
             nom::sequence::tuple((
                 nom::bytes::complete::tag("array"),
@@ -595,6 +689,9 @@ fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
     Ok((ir_text, ty))
 }
 
+// For types, constants, and dynamic constant parse functions, there is a
+// variant parsing the object itself, and a variant that parses the object and
+// returns the interned ID.
 fn parse_dynamic_constant_id<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
@@ -613,6 +710,8 @@ fn parse_dynamic_constant<'a>(ir_text: &'a str) -> nom::IResult<&'a str, Dynamic
             |x| parse_prim::<usize>(x, "1234567890"),
             |x| DynamicConstant::Constant(x),
         ),
+        // Parameter dynamic constants of a function are written by preprending
+        // a '#' to the parameter's number.
         nom::combinator::map(
             nom::sequence::tuple((nom::character::complete::char('#'), |x| {
                 parse_prim::<usize>(x, "1234567890")
@@ -633,12 +732,20 @@ fn parse_constant_id<'a>(
     Ok((ir_text, id))
 }
 
+/*
+ * parse_constant requires a type argument so that we know what we're parsing
+ * upfront. Not having this would make parsing primitive constants much harder.
+ * This is a bad requirement to have for a source language, but for a verbose
+ * textual format for an IR, it's fine and simplifies the parser, typechecking,
+ * and the IR itself.
+ */
 fn parse_constant<'a>(
     ir_text: &'a str,
     ty: Type,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Constant> {
     let (ir_text, constant) = match ty.clone() {
+        // There are not control constants.
         Type::Control(_) => Err(nom::Err::Error(nom::error::Error {
             input: ir_text,
             code: nom::error::ErrorKind::IsNot,
@@ -665,11 +772,10 @@ fn parse_constant<'a>(
             tys,
             context,
         )?,
-        Type::Array(elem_ty, dc_bound) => parse_array_constant(
+        Type::Array(elem_ty, _) => parse_array_constant(
             ir_text,
             context.borrow_mut().get_type_id(ty.clone()),
             elem_ty,
-            dc_bound,
             context,
         )?,
     };
@@ -677,6 +783,9 @@ fn parse_constant<'a>(
     Ok((ir_text, constant))
 }
 
+/*
+ * Utility for parsing types implementing FromStr.
+ */
 fn parse_prim<'a, T: FromStr>(ir_text: &'a str, chars: &'static str) -> nom::IResult<&'a str, T> {
     let (ir_text, x_text) = nom::bytes::complete::is_a(chars)(ir_text)?;
     let x = x_text.parse::<T>().map_err(|_| {
@@ -755,6 +864,8 @@ fn parse_product_constant<'a>(
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let mut ir_text = nom::character::complete::char('(')(ir_text)?.0;
     let mut subconstants = vec![];
+
+    // There should be one constant for each element type.
     for ty in tys.iter() {
         if !subconstants.is_empty() {
             ir_text = nom::character::complete::multispace0(ir_text)?.0;
@@ -788,6 +899,8 @@ fn parse_summation_constant<'a>(
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::character::complete::char('(')(ir_text)?.0;
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
+
+    // Sum constants need to specify their variant number.
     let (ir_text, variant) = parse_prim::<u32>(ir_text, "1234567890")?;
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::character::complete::char(',')(ir_text)?.0;
@@ -811,7 +924,6 @@ fn parse_array_constant<'a>(
     ir_text: &'a str,
     array_ty: TypeID,
     elem_ty: TypeID,
-    dc_bound: DynamicConstantID,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Constant> {
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
@@ -838,13 +950,17 @@ fn parse_array_constant<'a>(
     )(ir_text)?;
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::character::complete::char(']')(ir_text)?.0;
+
+    // Check that entries is the correct size during typechecking.
     Ok((
         ir_text,
-        Constant::Array(elem_ty, entries.into_boxed_slice()),
+        Constant::Array(array_ty, entries.into_boxed_slice()),
     ))
 }
 
 fn parse_identifier<'a>(ir_text: &'a str) -> nom::IResult<&'a str, &'a str> {
+    // Here's the set of characters that can be in an identifier. Must be
+    // non-empty.
     nom::combinator::verify(
         nom::bytes::complete::is_a(
             "1234567890_@ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
@@ -853,6 +969,9 @@ fn parse_identifier<'a>(ir_text: &'a str) -> nom::IResult<&'a str, &'a str> {
     )(ir_text)
 }
 
+/*
+ * Helper function for parsing tuples of arguments in the textual format.
+ */
 fn parse_tuple1<'a, A, AF>(mut parse_a: AF) -> impl FnMut(&'a str) -> nom::IResult<&'a str, (A,)>
 where
     AF: nom::Parser<&'a str, A, nom::error::Error<&'a str>>,
@@ -920,6 +1039,9 @@ where
     }
 }
 
+/*
+ * Some tests that demonstrate what the textual format looks like.
+ */
 mod tests {
     #[allow(unused_imports)]
     use super::*;
-- 
GitLab


From 17ff4e85f05c7558239ec9e6b4e9774807efa198 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 14 Sep 2023 13:54:18 -0500
Subject: [PATCH 014/105] Control token has explicit list of thread spawn
 factors

---
 hercules_ir/src/dot.rs   | 14 ++------------
 hercules_ir/src/ir.rs    | 15 +++++++--------
 hercules_ir/src/parse.rs | 36 +++++++++++++++++++-----------------
 samples/matmul.hir       |  4 ++--
 4 files changed, 30 insertions(+), 39 deletions(-)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 8d97e77d..6df6e7e3 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -93,17 +93,8 @@ fn write_node<W: std::fmt::Write>(
                 )?;
                 visited
             }
-            Node::Join {
-                control,
-                data,
-                factor,
-            } => {
-                write!(
-                    w,
-                    "{} [label=\"join<{:?}>\"];\n",
-                    name,
-                    module.dynamic_constants[factor.idx()]
-                )?;
+            Node::Join { control, data } => {
+                write!(w, "{} [label=\"join\"];\n", name,)?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
                 let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
                 write!(
@@ -312,7 +303,6 @@ fn get_string_node_kind(node: &Node) -> &'static str {
         Node::Join {
             control: _,
             data: _,
-            factor: _,
         } => "join",
         Node::Phi {
             control: _,
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 26992989..9f4fb3a8 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -37,16 +37,16 @@ pub struct Function {
  * feature of which being a merged control and data flow graph. Thus, control
  * is a type of value, just like any other type. However, the type system is
  * very restrictive over what can be done with control values. A novel addition
- * in Hercules IR is that a control type is parameterized by its thread count.
- * This is the mechanism in Hercules IR for representing parallelism. Summation
- * types are an IR equivalent of Rust's enum types. These are lowered into
- * tagged unions during scheduling. Array types are one-dimensional. Multi-
- * dimensional arrays are represented by nesting array types. An array extent
- * is represented with a dynamic constant.
+ * in Hercules IR is that a control type is parameterized by a list of thread
+ * spawning factors. This is the mechanism in Hercules IR for representing
+ * parallelism. Summation types are an IR equivalent of Rust's enum types.
+ * These are lowered into tagged unions during scheduling. Array types are one-
+ * dimensional. Multi-dimensional arrays are represented by nesting array types.
+ * An array extent is represented with a dynamic constant.
  */
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Type {
-    Control(DynamicConstantID),
+    Control(Box<[DynamicConstantID]>),
     Integer8,
     Integer16,
     Integer32,
@@ -132,7 +132,6 @@ pub enum Node {
     Join {
         control: NodeID,
         data: NodeID,
-        factor: DynamicConstantID,
     },
     Phi {
         control: NodeID,
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 4c91bc28..26c9eb59 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -129,7 +129,7 @@ fn parse_module<'a>(ir_text: &'a str, context: Context<'a>) -> nom::IResult<&'a
 
     // Assemble flat lists of interned goodies, now that we've figured out
     // everyones' IDs.
-    let mut types = vec![Type::Control(DynamicConstantID::new(0)); context.interned_types.len()];
+    let mut types = vec![Type::Control(Box::new([])); context.interned_types.len()];
     for (ty, id) in context.interned_types {
         types[id.idx()] = ty;
     }
@@ -346,20 +346,10 @@ fn parse_fork<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
 }
 
 fn parse_join<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (control, data, factor)) =
-        parse_tuple3(parse_identifier, parse_identifier, |x| {
-            parse_dynamic_constant_id(x, context)
-        })(ir_text)?;
+    let (ir_text, (control, data)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
     let control = context.borrow_mut().get_node_id(control);
     let data = context.borrow_mut().get_node_id(data);
-    Ok((
-        ir_text,
-        Node::Join {
-            control,
-            data,
-            factor,
-        },
-    ))
+    Ok((ir_text, Node::Join { control, data }))
 }
 
 fn parse_phi<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
@@ -597,19 +587,31 @@ fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
     // Parser combinators are very convenient, if a bit hard to read.
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let (ir_text, ty) = nom::branch::alt((
-        // Control tokens are parameterized by a dynamic constant representing
-        // their thread count.
+        // Control tokens are parameterized by a list of dynamic constants
+        // representing their thread spawn factors.
         nom::combinator::map(
             nom::sequence::tuple((
                 nom::bytes::complete::tag("ctrl"),
                 nom::character::complete::multispace0,
                 nom::character::complete::char('('),
-                |x| parse_dynamic_constant_id(x, context),
+                nom::character::complete::multispace0,
+                nom::multi::separated_list1(
+                    nom::sequence::tuple((
+                        nom::character::complete::multispace0,
+                        nom::character::complete::char(','),
+                        nom::character::complete::multispace0,
+                    )),
+                    |x| parse_dynamic_constant_id(x, context),
+                ),
                 nom::character::complete::multispace0,
                 nom::character::complete::char(')'),
             )),
-            |(_, _, _, id, _, _)| Type::Control(id),
+            |(_, _, _, _, id, _, _)| Type::Control(id.into_boxed_slice()),
         ),
+        // If no arguments are provided, assumed that no forks have occurred.
+        nom::combinator::map(nom::bytes::complete::tag("ctrl"), |_| {
+            Type::Control(Box::new([]))
+        }),
         // Primitive types are written in Rust style.
         nom::combinator::map(nom::bytes::complete::tag("i8"), |_| Type::Integer8),
         nom::combinator::map(nom::bytes::complete::tag("i16"), |_| Type::Integer16),
diff --git a/samples/matmul.hir b/samples/matmul.hir
index fe9975e9..f1e11a43 100644
--- a/samples/matmul.hir
+++ b/samples/matmul.hir
@@ -23,10 +23,10 @@ fn matmul<3>(a: array(array(f32, #1), #0), b: array(array(f32, #2), #1)) -> arra
   if = if(loop, less)
   if_false = read_prod(if, 0)
   if_true = read_prod(if, 1)
-  k_join = join(if_false, sum_inc, #2)
+  k_join = join(if_false, sum_inc)
   k_join_ctrl = read_prod(k_join, 0)
   k_join_data = read_prod(k_join, 1)
-  i_join = join(k_join_ctrl, k_join_data, #0)
+  i_join = join(k_join_ctrl, k_join_data)
   i_join_ctrl = read_prod(i_join, 0)
   i_join_data = read_prod(i_join, 1)
   r = return(i_join_ctrl, i_join_data)
-- 
GitLab


From 5895381e6434c2a9e3ba97e68ad27276ecbea83c Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 14 Sep 2023 14:04:35 -0500
Subject: [PATCH 015/105] Add rem/lte/gt/gte ops

---
 hercules_ir/src/dot.rs   | 42 +++++++++++++++++++++++++++++++++++++---
 hercules_ir/src/ir.rs    | 18 ++++++++++++++++-
 hercules_ir/src/parse.rs | 41 +++++++++++++++++++++++++++++++++------
 samples/matmul.hir       |  2 +-
 4 files changed, 92 insertions(+), 11 deletions(-)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 6df6e7e3..c3312f5c 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -186,8 +186,40 @@ fn write_node<W: std::fmt::Write>(
                 write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
                 visited
             }
-            Node::LessThan { left, right } => {
-                write!(w, "{} [label=\"less_than\"];\n", name)?;
+            Node::Rem { left, right } => {
+                write!(w, "{} [label=\"rem\"];\n", name)?;
+                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
+                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
+                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
+                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
+                visited
+            }
+            Node::LT { left, right } => {
+                write!(w, "{} [label=\"lt\"];\n", name)?;
+                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
+                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
+                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
+                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
+                visited
+            }
+            Node::LTE { left, right } => {
+                write!(w, "{} [label=\"lte\"];\n", name)?;
+                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
+                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
+                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
+                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
+                visited
+            }
+            Node::GT { left, right } => {
+                write!(w, "{} [label=\"gt\"];\n", name)?;
+                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
+                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
+                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
+                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
+                visited
+            }
+            Node::GTE { left, right } => {
+                write!(w, "{} [label=\"gte\"];\n", name)?;
                 let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
                 let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
@@ -319,7 +351,11 @@ fn get_string_node_kind(node: &Node) -> &'static str {
         Node::Sub { left: _, right: _ } => "sub",
         Node::Mul { left: _, right: _ } => "mul",
         Node::Div { left: _, right: _ } => "div",
-        Node::LessThan { left: _, right: _ } => "less_than",
+        Node::Rem { left: _, right: _ } => "rem",
+        Node::LT { left: _, right: _ } => "lt",
+        Node::LTE { left: _, right: _ } => "lte",
+        Node::GT { left: _, right: _ } => "gt",
+        Node::GTE { left: _, right: _ } => "gte",
         Node::Call {
             function: _,
             dynamic_constants: _,
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 9f4fb3a8..c0bf8b29 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -166,7 +166,23 @@ pub enum Node {
         left: NodeID,
         right: NodeID,
     },
-    LessThan {
+    Rem {
+        left: NodeID,
+        right: NodeID,
+    },
+    LT {
+        left: NodeID,
+        right: NodeID,
+    },
+    LTE {
+        left: NodeID,
+        right: NodeID,
+    },
+    GT {
+        left: NodeID,
+        right: NodeID,
+    },
+    GTE {
         left: NodeID,
         right: NodeID,
     },
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 26c9eb59..9eab3d9f 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -280,7 +280,11 @@ fn parse_node<'a>(
         "sub" => parse_sub(ir_text, context)?,
         "mul" => parse_mul(ir_text, context)?,
         "div" => parse_div(ir_text, context)?,
-        "less_than" => parse_less_than(ir_text, context)?,
+        "rem" => parse_rem(ir_text, context)?,
+        "lt" => parse_lt(ir_text, context)?,
+        "lte" => parse_lte(ir_text, context)?,
+        "gt" => parse_gt(ir_text, context)?,
+        "gte" => parse_gte(ir_text, context)?,
         "call" => parse_call(ir_text, context)?,
         "read_prod" => parse_read_prod(ir_text, context)?,
         "write_prod" => parse_write_prod(ir_text, context)?,
@@ -437,14 +441,39 @@ fn parse_div<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResu
     Ok((ir_text, Node::Div { left, right }))
 }
 
-fn parse_less_than<'a>(
-    ir_text: &'a str,
-    context: &RefCell<Context<'a>>,
-) -> nom::IResult<&'a str, Node> {
+fn parse_rem<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let left = context.borrow_mut().get_node_id(left);
+    let right = context.borrow_mut().get_node_id(right);
+    Ok((ir_text, Node::Rem { left, right }))
+}
+
+fn parse_lt<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let left = context.borrow_mut().get_node_id(left);
+    let right = context.borrow_mut().get_node_id(right);
+    Ok((ir_text, Node::LT { left, right }))
+}
+
+fn parse_lte<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let left = context.borrow_mut().get_node_id(left);
+    let right = context.borrow_mut().get_node_id(right);
+    Ok((ir_text, Node::LTE { left, right }))
+}
+
+fn parse_gt<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let left = context.borrow_mut().get_node_id(left);
+    let right = context.borrow_mut().get_node_id(right);
+    Ok((ir_text, Node::GT { left, right }))
+}
+
+fn parse_gte<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
     let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
     let left = context.borrow_mut().get_node_id(left);
     let right = context.borrow_mut().get_node_id(right);
-    Ok((ir_text, Node::LessThan { left, right }))
+    Ok((ir_text, Node::GTE { left, right }))
 }
 
 fn parse_call<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
diff --git a/samples/matmul.hir b/samples/matmul.hir
index f1e11a43..511bdfa8 100644
--- a/samples/matmul.hir
+++ b/samples/matmul.hir
@@ -19,7 +19,7 @@ fn matmul<3>(a: array(array(f32, #1), #0), b: array(array(f32, #2), #1)) -> arra
   mul = mul(val1, val2)
   sum_inc = add(sum, mul)
   j_size = dynamic_constant(#1)
-  less = less_than(j_inc, j_size)
+  less = lt(j_inc, j_size)
   if = if(loop, less)
   if_false = read_prod(if, 0)
   if_true = read_prod(if, 1)
-- 
GitLab


From 0c722d6e1b54b902e0295240ac4bb5535d67a976 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 14 Sep 2023 14:35:49 -0500
Subject: [PATCH 016/105] Restructure binary / unary ops to one node type

---
 hercules_ir/src/dot.rs   | 108 +++++++++++++--------------------------
 hercules_ir/src/ir.rs    |  58 ++++++++++-----------
 hercules_ir/src/parse.rs |  92 ++++++++++-----------------------
 3 files changed, 88 insertions(+), 170 deletions(-)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index c3312f5c..e72f118d 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -154,72 +154,14 @@ fn write_node<W: std::fmt::Write>(
                 )?;
                 visited
             }
-            Node::Add { left, right } => {
-                write!(w, "{} [label=\"add\"];\n", name)?;
-                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
-                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
-                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
-                visited
-            }
-            Node::Sub { left, right } => {
-                write!(w, "{} [label=\"sub\"];\n", name)?;
-                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
-                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
-                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
-                visited
-            }
-            Node::Mul { left, right } => {
-                write!(w, "{} [label=\"mul\"];\n", name)?;
-                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
-                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
-                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
-                visited
-            }
-            Node::Div { left, right } => {
-                write!(w, "{} [label=\"div\"];\n", name)?;
-                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
-                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
-                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
-                visited
-            }
-            Node::Rem { left, right } => {
-                write!(w, "{} [label=\"rem\"];\n", name)?;
-                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
-                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
-                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
-                visited
-            }
-            Node::LT { left, right } => {
-                write!(w, "{} [label=\"lt\"];\n", name)?;
-                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
-                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
-                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
-                visited
-            }
-            Node::LTE { left, right } => {
-                write!(w, "{} [label=\"lte\"];\n", name)?;
-                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
-                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
-                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
-                visited
-            }
-            Node::GT { left, right } => {
-                write!(w, "{} [label=\"gt\"];\n", name)?;
-                let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
-                let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
-                write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
-                write!(w, "{} -> {} [label=\"right\"];\n", right_name, name)?;
+            Node::Unary { input, op } => {
+                write!(w, "{} [label=\"{}\"];\n", name, get_string_uop_kind(*op))?;
+                let (input_name, visited) = write_node(i, input.idx(), module, visited, w)?;
+                write!(w, "{} -> {} [label=\"input\"];\n", input_name, name)?;
                 visited
             }
-            Node::GTE { left, right } => {
-                write!(w, "{} [label=\"gte\"];\n", name)?;
+            Node::Binary { left, right, op } => {
+                write!(w, "{} [label=\"{}\"];\n", name, get_string_bop_kind(*op))?;
                 let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
                 let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
@@ -347,15 +289,12 @@ fn get_string_node_kind(node: &Node) -> &'static str {
         Node::Parameter { index: _ } => "parameter",
         Node::DynamicConstant { id: _ } => "dynamic_constant",
         Node::Constant { id: _ } => "constant",
-        Node::Add { left: _, right: _ } => "add",
-        Node::Sub { left: _, right: _ } => "sub",
-        Node::Mul { left: _, right: _ } => "mul",
-        Node::Div { left: _, right: _ } => "div",
-        Node::Rem { left: _, right: _ } => "rem",
-        Node::LT { left: _, right: _ } => "lt",
-        Node::LTE { left: _, right: _ } => "lte",
-        Node::GT { left: _, right: _ } => "gt",
-        Node::GTE { left: _, right: _ } => "gte",
+        Node::Unary { input: _, op } => get_string_uop_kind(*op),
+        Node::Binary {
+            left: _,
+            right: _,
+            op,
+        } => get_string_bop_kind(*op),
         Node::Call {
             function: _,
             dynamic_constants: _,
@@ -381,3 +320,26 @@ fn get_string_node_kind(node: &Node) -> &'static str {
         } => "build_sum",
     }
 }
+
+fn get_string_uop_kind(uop: UnaryOperator) -> &'static str {
+    match uop {
+        UnaryOperator::Not => "not",
+        UnaryOperator::Neg => "neg",
+    }
+}
+
+fn get_string_bop_kind(bop: BinaryOperator) -> &'static str {
+    match bop {
+        BinaryOperator::Add => "add",
+        BinaryOperator::Sub => "sub",
+        BinaryOperator::Mul => "mul",
+        BinaryOperator::Div => "div",
+        BinaryOperator::Rem => "rem",
+        BinaryOperator::LT => "lt",
+        BinaryOperator::LTE => "lte",
+        BinaryOperator::GT => "gt",
+        BinaryOperator::GTE => "gte",
+        BinaryOperator::EQ => "eq",
+        BinaryOperator::NE => "ne",
+    }
+}
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index c0bf8b29..639437a6 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -150,41 +150,14 @@ pub enum Node {
     DynamicConstant {
         id: DynamicConstantID,
     },
-    Add {
-        left: NodeID,
-        right: NodeID,
-    },
-    Sub {
-        left: NodeID,
-        right: NodeID,
-    },
-    Mul {
-        left: NodeID,
-        right: NodeID,
+    Unary {
+        input: NodeID,
+        op: UnaryOperator,
     },
-    Div {
-        left: NodeID,
-        right: NodeID,
-    },
-    Rem {
-        left: NodeID,
-        right: NodeID,
-    },
-    LT {
-        left: NodeID,
-        right: NodeID,
-    },
-    LTE {
-        left: NodeID,
-        right: NodeID,
-    },
-    GT {
-        left: NodeID,
-        right: NodeID,
-    },
-    GTE {
+    Binary {
         left: NodeID,
         right: NodeID,
+        op: BinaryOperator,
     },
     Call {
         function: FunctionID,
@@ -220,6 +193,27 @@ pub enum Node {
     },
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum UnaryOperator {
+    Not,
+    Neg,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum BinaryOperator {
+    Add,
+    Sub,
+    Mul,
+    Div,
+    Rem,
+    LT,
+    LTE,
+    GT,
+    GTE,
+    EQ,
+    NE,
+}
+
 /*
  * Rust things to make newtyped IDs usable.
  */
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 9eab3d9f..c4cd4ab5 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -276,15 +276,19 @@ fn parse_node<'a>(
         "return" => parse_return(ir_text, context)?,
         "constant" => parse_constant_node(ir_text, context)?,
         "dynamic_constant" => parse_dynamic_constant_node(ir_text, context)?,
-        "add" => parse_add(ir_text, context)?,
-        "sub" => parse_sub(ir_text, context)?,
-        "mul" => parse_mul(ir_text, context)?,
-        "div" => parse_div(ir_text, context)?,
-        "rem" => parse_rem(ir_text, context)?,
-        "lt" => parse_lt(ir_text, context)?,
-        "lte" => parse_lte(ir_text, context)?,
-        "gt" => parse_gt(ir_text, context)?,
-        "gte" => parse_gte(ir_text, context)?,
+        "not" => parse_unary(ir_text, context, UnaryOperator::Not)?,
+        "neg" => parse_unary(ir_text, context, UnaryOperator::Neg)?,
+        "add" => parse_binary(ir_text, context, BinaryOperator::Add)?,
+        "sub" => parse_binary(ir_text, context, BinaryOperator::Sub)?,
+        "mul" => parse_binary(ir_text, context, BinaryOperator::Mul)?,
+        "div" => parse_binary(ir_text, context, BinaryOperator::Div)?,
+        "rem" => parse_binary(ir_text, context, BinaryOperator::Rem)?,
+        "lt" => parse_binary(ir_text, context, BinaryOperator::LT)?,
+        "lte" => parse_binary(ir_text, context, BinaryOperator::LTE)?,
+        "gt" => parse_binary(ir_text, context, BinaryOperator::GT)?,
+        "gte" => parse_binary(ir_text, context, BinaryOperator::GTE)?,
+        "eq" => parse_binary(ir_text, context, BinaryOperator::EQ)?,
+        "ne" => parse_binary(ir_text, context, BinaryOperator::NE)?,
         "call" => parse_call(ir_text, context)?,
         "read_prod" => parse_read_prod(ir_text, context)?,
         "write_prod" => parse_write_prod(ir_text, context)?,
@@ -413,67 +417,25 @@ fn parse_dynamic_constant_node<'a>(
     Ok((ir_text, Node::DynamicConstant { id }))
 }
 
-fn parse_add<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
-    let left = context.borrow_mut().get_node_id(left);
-    let right = context.borrow_mut().get_node_id(right);
-    Ok((ir_text, Node::Add { left, right }))
-}
-
-fn parse_sub<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
-    let left = context.borrow_mut().get_node_id(left);
-    let right = context.borrow_mut().get_node_id(right);
-    Ok((ir_text, Node::Sub { left, right }))
-}
-
-fn parse_mul<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
-    let left = context.borrow_mut().get_node_id(left);
-    let right = context.borrow_mut().get_node_id(right);
-    Ok((ir_text, Node::Mul { left, right }))
-}
-
-fn parse_div<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
-    let left = context.borrow_mut().get_node_id(left);
-    let right = context.borrow_mut().get_node_id(right);
-    Ok((ir_text, Node::Div { left, right }))
-}
-
-fn parse_rem<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
-    let left = context.borrow_mut().get_node_id(left);
-    let right = context.borrow_mut().get_node_id(right);
-    Ok((ir_text, Node::Rem { left, right }))
-}
-
-fn parse_lt<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
-    let left = context.borrow_mut().get_node_id(left);
-    let right = context.borrow_mut().get_node_id(right);
-    Ok((ir_text, Node::LT { left, right }))
-}
-
-fn parse_lte<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
-    let left = context.borrow_mut().get_node_id(left);
-    let right = context.borrow_mut().get_node_id(right);
-    Ok((ir_text, Node::LTE { left, right }))
-}
-
-fn parse_gt<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
-    let left = context.borrow_mut().get_node_id(left);
-    let right = context.borrow_mut().get_node_id(right);
-    Ok((ir_text, Node::GT { left, right }))
+fn parse_unary<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+    op: UnaryOperator,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (input,)) = parse_tuple1(parse_identifier)(ir_text)?;
+    let input = context.borrow_mut().get_node_id(input);
+    Ok((ir_text, Node::Unary { input, op }))
 }
 
-fn parse_gte<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
+fn parse_binary<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+    op: BinaryOperator,
+) -> nom::IResult<&'a str, Node> {
     let (ir_text, (left, right)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
     let left = context.borrow_mut().get_node_id(left);
     let right = context.borrow_mut().get_node_id(right);
-    Ok((ir_text, Node::GTE { left, right }))
+    Ok((ir_text, Node::Binary { left, right, op }))
 }
 
 fn parse_call<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-- 
GitLab


From 212d93439a8db8a7b737c62782daaa12aad8b568 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 14 Sep 2023 14:42:55 -0500
Subject: [PATCH 017/105] Add some comments

---
 hercules_ir/src/parse.rs | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index c4cd4ab5..50613f5c 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -276,6 +276,8 @@ fn parse_node<'a>(
         "return" => parse_return(ir_text, context)?,
         "constant" => parse_constant_node(ir_text, context)?,
         "dynamic_constant" => parse_dynamic_constant_node(ir_text, context)?,
+        // Unary and binary ops are spelled out in the textual format, but we
+        // parse them into Unary or Binary node kinds.
         "not" => parse_unary(ir_text, context, UnaryOperator::Not)?,
         "neg" => parse_unary(ir_text, context, UnaryOperator::Neg)?,
         "add" => parse_binary(ir_text, context, BinaryOperator::Add)?,
@@ -357,6 +359,11 @@ fn parse_join<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
     let (ir_text, (control, data)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
     let control = context.borrow_mut().get_node_id(control);
     let data = context.borrow_mut().get_node_id(data);
+
+    // A join node doesn't need to explicitly store a join factor. The join
+    // factor is implicitly stored at the tail of the control token's type
+    // level list of thread spawn factors. Intuitively, fork pushes to the end
+    // of this list, while join just pops from the end of this list.
     Ok((ir_text, Node::Join { control, data }))
 }
 
@@ -944,7 +951,7 @@ fn parse_array_constant<'a>(
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
     let ir_text = nom::character::complete::char(']')(ir_text)?.0;
 
-    // Check that entries is the correct size during typechecking.
+    // Will check that entries is the correct size during typechecking.
     Ok((
         ir_text,
         Constant::Array(array_ty, entries.into_boxed_slice()),
@@ -952,8 +959,8 @@ fn parse_array_constant<'a>(
 }
 
 fn parse_identifier<'a>(ir_text: &'a str) -> nom::IResult<&'a str, &'a str> {
-    // Here's the set of characters that can be in an identifier. Must be
-    // non-empty.
+    // Here's the set of characters that can be in an identifier. Must be non-
+    // empty.
     nom::combinator::verify(
         nom::bytes::complete::is_a(
             "1234567890_@ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
-- 
GitLab


From e5eabf64f080fee9beaf7b7dfbffe60a86c49ed7 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 15 Sep 2023 22:10:38 -0500
Subject: [PATCH 018/105] Bitflip, lsh, rsh

---
 hercules_ir/src/dot.rs   | 3 +++
 hercules_ir/src/ir.rs    | 3 +++
 hercules_ir/src/parse.rs | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index e72f118d..3aa4326b 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -325,6 +325,7 @@ fn get_string_uop_kind(uop: UnaryOperator) -> &'static str {
     match uop {
         UnaryOperator::Not => "not",
         UnaryOperator::Neg => "neg",
+        UnaryOperator::Bitflip => "bitflip",
     }
 }
 
@@ -341,5 +342,7 @@ fn get_string_bop_kind(bop: BinaryOperator) -> &'static str {
         BinaryOperator::GTE => "gte",
         BinaryOperator::EQ => "eq",
         BinaryOperator::NE => "ne",
+        BinaryOperator::LSh => "lsh",
+        BinaryOperator::RSh => "rsh",
     }
 }
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 639437a6..22016e08 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -197,6 +197,7 @@ pub enum Node {
 pub enum UnaryOperator {
     Not,
     Neg,
+    Bitflip,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@@ -212,6 +213,8 @@ pub enum BinaryOperator {
     GTE,
     EQ,
     NE,
+    LSh,
+    RSh,
 }
 
 /*
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 50613f5c..e4a85581 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -280,6 +280,7 @@ fn parse_node<'a>(
         // parse them into Unary or Binary node kinds.
         "not" => parse_unary(ir_text, context, UnaryOperator::Not)?,
         "neg" => parse_unary(ir_text, context, UnaryOperator::Neg)?,
+        "bitflip" => parse_unary(ir_text, context, UnaryOperator::Bitflip)?,
         "add" => parse_binary(ir_text, context, BinaryOperator::Add)?,
         "sub" => parse_binary(ir_text, context, BinaryOperator::Sub)?,
         "mul" => parse_binary(ir_text, context, BinaryOperator::Mul)?,
@@ -291,6 +292,8 @@ fn parse_node<'a>(
         "gte" => parse_binary(ir_text, context, BinaryOperator::GTE)?,
         "eq" => parse_binary(ir_text, context, BinaryOperator::EQ)?,
         "ne" => parse_binary(ir_text, context, BinaryOperator::NE)?,
+        "lsh" => parse_binary(ir_text, context, BinaryOperator::LSh)?,
+        "rsh" => parse_binary(ir_text, context, BinaryOperator::RSh)?,
         "call" => parse_call(ir_text, context)?,
         "read_prod" => parse_read_prod(ir_text, context)?,
         "write_prod" => parse_write_prod(ir_text, context)?,
-- 
GitLab


From b1b0930225128c4bfa6e5644adce5e33513e43bd Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 15 Sep 2023 22:16:32 -0500
Subject: [PATCH 019/105] Top level parse return result

---
 hercules_ir/src/parse.rs                | 9 ++++++---
 hercules_tools/src/hercules_dot/main.rs | 3 ++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index e4a85581..7d3d2e55 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -9,8 +9,10 @@ use crate::*;
 /*
  * Top level parse function.
  */
-pub fn parse(ir_test: &str) -> Module {
-    parse_module(ir_test, Context::default()).unwrap().1
+pub fn parse(ir_test: &str) -> Result<Module, ()> {
+    parse_module(ir_test, Context::default())
+        .map(|x| x.1)
+        .map_err(|_| ())
 }
 
 /*
@@ -1063,6 +1065,7 @@ fn add<1>(x: i32, y: i32) -> i32
   w = add(z, c)
   z = add(x, y)
 ",
-        );
+        )
+        .unwrap();
     }
 }
diff --git a/hercules_tools/src/hercules_dot/main.rs b/hercules_tools/src/hercules_dot/main.rs
index a9153689..7db02a7b 100644
--- a/hercules_tools/src/hercules_dot/main.rs
+++ b/hercules_tools/src/hercules_dot/main.rs
@@ -26,7 +26,8 @@ fn main() {
     let mut contents = String::new();
     file.read_to_string(&mut contents)
         .expect("PANIC: Unable to read input file contents.");
-    let module = hercules_ir::parse::parse(&contents);
+    let module =
+        hercules_ir::parse::parse(&contents).expect("PANIC: Failed to parse Hercules IR file.");
     if args.output.is_empty() {
         let mut tmp_path = temp_dir();
         tmp_path.push("hercules_dot.dot");
-- 
GitLab


From 79c5008ff69a13e4ce18b45dbc9458f4bb6717a0 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 15 Sep 2023 23:05:38 -0500
Subject: [PATCH 020/105] Begin def_use code

---
 hercules_ir/src/def_use.rs | 108 +++++++++++++++++++++++++++++++++++++
 hercules_ir/src/ir.rs      |   3 +-
 hercules_ir/src/lib.rs     |   2 +
 hercules_ir/src/parse.rs   |   2 +
 4 files changed, 114 insertions(+), 1 deletion(-)
 create mode 100644 hercules_ir/src/def_use.rs

diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
new file mode 100644
index 00000000..bba8b7bd
--- /dev/null
+++ b/hercules_ir/src/def_use.rs
@@ -0,0 +1,108 @@
+use crate::*;
+
+/*
+ * Custom type for an immutable def_use map. This is a relatively efficient
+ * storage of def_use edges, requiring 2 heap allocations.
+ */
+#[derive(Debug, Clone)]
+pub struct ImmutableDefUseMap {
+    first_edges: Vec<u32>,
+    uses: Vec<NodeID>,
+}
+
+impl ImmutableDefUseMap {
+    pub fn num_edges(&self, id: NodeID) -> u32 {
+        if id.idx() + 1 < self.first_edges.len() {
+            self.first_edges[id.idx() + 1] - self.first_edges[id.idx()]
+        } else {
+            self.first_edges.len() as u32 - self.first_edges[id.idx()]
+        }
+    }
+
+    pub fn get_use(&self, id: NodeID, n: u32) -> NodeID {
+        assert!(
+            n < self.num_edges(id),
+            "PANIC: Attempted to get use edge #{} from node with only {} use edges.",
+            n + 1,
+            self.num_edges(id)
+        );
+        self.uses[(self.first_edges[id.idx()] + n) as usize]
+    }
+}
+
+/*
+ * Top level def_use function.
+ */
+pub fn def_use(function: &Function) -> ImmutableDefUseMap {
+    todo!()
+}
+
+/*
+ * Enum for storing uses of node. Using get_uses, one can easily iterate over
+ * the defs that a node uses.
+ */
+#[derive(Debug, Clone)]
+pub enum NodeUses<'a> {
+    Zero,
+    One([NodeID; 1]),
+    Two([NodeID; 2]),
+    Three([NodeID; 3]),
+    Variable(&'a Box<[NodeID]>),
+    Phi(Box<[NodeID]>),
+}
+
+impl<'a> AsRef<[NodeID]> for NodeUses<'a> {
+    fn as_ref(&self) -> &[NodeID] {
+        match self {
+            NodeUses::Zero => &[],
+            NodeUses::One(x) => x,
+            NodeUses::Two(x) => x,
+            NodeUses::Three(x) => x,
+            NodeUses::Variable(x) => x,
+            NodeUses::Phi(x) => x,
+        }
+    }
+}
+
+/*
+ * Construct NodeUses for a Node.
+ */
+pub fn get_uses<'a>(node: &'a Node) -> NodeUses<'a> {
+    match node {
+        Node::Start => NodeUses::Zero,
+        Node::Region { preds } => NodeUses::Variable(preds),
+        Node::If { control, cond } => NodeUses::Two([*control, *cond]),
+        Node::Fork { control, factor: _ } => NodeUses::One([*control]),
+        Node::Join { control, data } => NodeUses::Two([*control, *data]),
+        Node::Phi { control, data } => {
+            let mut uses: Vec<NodeID> = Vec::from(&data[..]);
+            uses.push(*control);
+            NodeUses::Phi(uses.into_boxed_slice())
+        }
+        Node::Return { control, value } => NodeUses::Two([*control, *value]),
+        Node::Parameter { index: _ } => todo!(),
+        Node::Constant { id: _ } => todo!(),
+        Node::DynamicConstant { id: _ } => todo!(),
+        Node::Unary { input, op: _ } => NodeUses::One([*input]),
+        Node::Binary { left, right, op: _ } => NodeUses::Two([*left, *right]),
+        Node::Call {
+            function: _,
+            dynamic_constants: _,
+            args,
+        } => NodeUses::Variable(args),
+        Node::ReadProd { prod, index: _ } => NodeUses::One([*prod]),
+        Node::WriteProd {
+            prod,
+            data,
+            index: _,
+        } => NodeUses::Two([*prod, *data]),
+        Node::ReadArray { array, index } => NodeUses::Two([*array, *index]),
+        Node::WriteArray { array, data, index } => NodeUses::Three([*array, *data, *index]),
+        Node::Match { control, sum } => NodeUses::Two([*control, *sum]),
+        Node::BuildSum {
+            data,
+            sum_ty: _,
+            variant: _,
+        } => NodeUses::One([*data]),
+    }
+}
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 22016e08..1734548e 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -28,6 +28,7 @@ pub struct Function {
     pub param_types: Vec<TypeID>,
     pub return_type: TypeID,
     pub nodes: Vec<Node>,
+    pub node_types: Option<Vec<TypeID>>,
     pub num_dynamic_constants: u32,
 }
 
@@ -111,7 +112,7 @@ pub enum DynamicConstant {
  * type instead. For example, the if node produces prod(control(N),
  * control(N)), where the first control token represents the false branch, and
  * the second control token represents the true branch. Another example is the
- * fork node, which produces prod(control(N*k), u64), where the u64 is the
+ * fork node, which produces prod(control(N, K), u64), where the u64 is the
  * thread ID. Functions are devoid of side effects, so call nodes don't take as
  * input or output control tokens. There is also no global memory - use arrays.
  */
diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs
index 29742537..f1a99d22 100644
--- a/hercules_ir/src/lib.rs
+++ b/hercules_ir/src/lib.rs
@@ -1,7 +1,9 @@
+pub mod def_use;
 pub mod dot;
 pub mod ir;
 pub mod parse;
 
+pub use crate::def_use::*;
 pub use crate::dot::*;
 pub use crate::ir::*;
 pub use crate::parse::*;
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 7d3d2e55..e9638c0b 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -116,6 +116,7 @@ fn parse_module<'a>(ir_text: &'a str, context: Context<'a>) -> nom::IResult<&'a
             param_types: vec![],
             return_type: TypeID::new(0),
             nodes: vec![],
+            node_types: None,
             num_dynamic_constants: 0
         };
         context.function_ids.len()
@@ -250,6 +251,7 @@ fn parse_function<'a>(
             param_types: params.into_iter().map(|x| x.5).collect(),
             return_type,
             nodes: fixed_nodes,
+            node_types: None,
             num_dynamic_constants,
         },
     ))
-- 
GitLab


From 0fbcddbc72fdc765b719e87a7bc770ec4f54ad89 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 15 Sep 2023 23:15:36 -0500
Subject: [PATCH 021/105] Fill in def_use function

---
 hercules_ir/src/def_use.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
index bba8b7bd..89355e86 100644
--- a/hercules_ir/src/def_use.rs
+++ b/hercules_ir/src/def_use.rs
@@ -34,7 +34,17 @@ impl ImmutableDefUseMap {
  * Top level def_use function.
  */
 pub fn def_use(function: &Function) -> ImmutableDefUseMap {
-    todo!()
+    // Step 1: get uses for each node.
+    let node_uses: Vec<NodeUses> = function.nodes.iter().map(|node| get_uses(node)).collect();
+
+    // Step 2: assemble first_edges and uses vectors simultaneously.
+    let mut first_edges: Vec<u32> = vec![];
+    let mut uses: Vec<NodeID> = vec![];
+    for u in node_uses {
+        first_edges.push(uses.len() as u32);
+        uses.extend_from_slice(u.as_ref());
+    }
+    ImmutableDefUseMap { first_edges, uses }
 }
 
 /*
-- 
GitLab


From 2301fa9af081a4497755f435fd67d8cfd0b3f953 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 16 Sep 2023 09:39:46 -0500
Subject: [PATCH 022/105] get_use -> get_uses

---
 hercules_ir/src/def_use.rs | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
index 89355e86..fa789a86 100644
--- a/hercules_ir/src/def_use.rs
+++ b/hercules_ir/src/def_use.rs
@@ -19,14 +19,10 @@ impl ImmutableDefUseMap {
         }
     }
 
-    pub fn get_use(&self, id: NodeID, n: u32) -> NodeID {
-        assert!(
-            n < self.num_edges(id),
-            "PANIC: Attempted to get use edge #{} from node with only {} use edges.",
-            n + 1,
-            self.num_edges(id)
-        );
-        self.uses[(self.first_edges[id.idx()] + n) as usize]
+    pub fn get_uses(&self, id: NodeID) -> &[NodeID] {
+        let first_edge = self.first_edges[id.idx()] as usize;
+        let num_edges = self.num_edges(id) as usize;
+        &self.uses[first_edge..first_edge + num_edges]
     }
 }
 
@@ -58,6 +54,9 @@ pub enum NodeUses<'a> {
     Two([NodeID; 2]),
     Three([NodeID; 3]),
     Variable(&'a Box<[NodeID]>),
+    // Phi nodes are special, and store both a NodeID locally *and* many in a
+    // boxed slice. Since these NodeIDs are not stored contiguously, we have to
+    // construct a new contiguous slice by copying. Sigh.
     Phi(Box<[NodeID]>),
 }
 
-- 
GitLab


From 59cb5783daffe46a9dcf37a136b0bd6b43708448 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 16 Sep 2023 10:14:07 -0500
Subject: [PATCH 023/105] Fix def_use function

---
 hercules_ir/src/def_use.rs | 38 ++++++++++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
index fa789a86..44b34a23 100644
--- a/hercules_ir/src/def_use.rs
+++ b/hercules_ir/src/def_use.rs
@@ -7,7 +7,7 @@ use crate::*;
 #[derive(Debug, Clone)]
 pub struct ImmutableDefUseMap {
     first_edges: Vec<u32>,
-    uses: Vec<NodeID>,
+    users: Vec<NodeID>,
 }
 
 impl ImmutableDefUseMap {
@@ -22,7 +22,7 @@ impl ImmutableDefUseMap {
     pub fn get_uses(&self, id: NodeID) -> &[NodeID] {
         let first_edge = self.first_edges[id.idx()] as usize;
         let num_edges = self.num_edges(id) as usize;
-        &self.uses[first_edge..first_edge + num_edges]
+        &self.users[first_edge..first_edge + num_edges]
     }
 }
 
@@ -33,14 +33,36 @@ pub fn def_use(function: &Function) -> ImmutableDefUseMap {
     // Step 1: get uses for each node.
     let node_uses: Vec<NodeUses> = function.nodes.iter().map(|node| get_uses(node)).collect();
 
-    // Step 2: assemble first_edges and uses vectors simultaneously.
+    // Step 2: count number of users per node.
+    let mut num_users: Vec<u32> = vec![0; node_uses.len()];
+    for uses in node_uses.iter() {
+        for u in uses.as_ref() {
+            num_users[u.idx()] += 1;
+        }
+    }
+
+    // Step 3: assemble first_edges vector.
     let mut first_edges: Vec<u32> = vec![];
-    let mut uses: Vec<NodeID> = vec![];
-    for u in node_uses {
-        first_edges.push(uses.len() as u32);
-        uses.extend_from_slice(u.as_ref());
+    let mut num_edges = 0;
+    for num_users in num_users {
+        first_edges.push(num_edges);
+        num_edges += num_users;
     }
-    ImmutableDefUseMap { first_edges, uses }
+
+    // Step 4: assemble users vector.
+    let mut users: Vec<NodeID> = vec![NodeID::new(0); num_edges as usize];
+    let mut num_users_per_node: Vec<u32> = vec![0; node_uses.len()];
+    for (idx, uses) in node_uses.iter().enumerate() {
+        for u in uses.as_ref() {
+            let first_edge = first_edges[u.idx()];
+            let num_users_so_far = num_users_per_node[u.idx()];
+            users[first_edge as usize + num_users_so_far as usize] = NodeID::new(idx);
+            num_users_per_node[u.idx()] = num_users_so_far + 1;
+        }
+    }
+
+    // Step 5: pack and return.
+    ImmutableDefUseMap { first_edges, users }
 }
 
 /*
-- 
GitLab


From 805460368beaee4ec054828d2c4f52eb63d228e9 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 16 Sep 2023 17:48:25 -0500
Subject: [PATCH 024/105] Skeleton dataflow code

---
 hercules_ir/src/dataflow.rs | 35 +++++++++++++++++++++++++++++++++++
 hercules_ir/src/ir.rs       |  1 -
 hercules_ir/src/lib.rs      |  2 ++
 hercules_ir/src/parse.rs    |  2 --
 4 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 hercules_ir/src/dataflow.rs

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
new file mode 100644
index 00000000..996b9547
--- /dev/null
+++ b/hercules_ir/src/dataflow.rs
@@ -0,0 +1,35 @@
+use crate::*;
+
+/*
+ * Trait for a type that is a semilattice. Semilattice types must also be Eq,
+ * so that the dataflow analysis can determine when to terminate.
+ */
+pub trait Semilattice: Eq {
+    fn meet(a: &Self, b: &Self) -> Self;
+    fn bottom() -> Self;
+    fn top() -> Self;
+}
+
+/*
+ * Top level dataflow function.
+ */
+pub fn dataflow<L, F, D>(function: Function, flow_function: F, auxiliary_data: &D) -> Vec<L>
+where
+    L: Semilattice,
+    F: Fn(L, &D, NodeID) -> L,
+{
+    // Step 1: create initial set of "in" points. The start node is initialized
+    // to bottom, and everything else is initialized to top.
+    let points: Vec<L> = (0..function.nodes.len())
+        .map(|id| if id == 0 { L::bottom() } else { L::top() })
+        .collect();
+
+    todo!()
+}
+
+/*
+ * Compute reverse post order of nodes in function.
+ */
+pub fn reverse_postorder(function: Function) -> Vec<NodeID> {
+    todo!()
+}
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 1734548e..3f73d6dc 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -28,7 +28,6 @@ pub struct Function {
     pub param_types: Vec<TypeID>,
     pub return_type: TypeID,
     pub nodes: Vec<Node>,
-    pub node_types: Option<Vec<TypeID>>,
     pub num_dynamic_constants: u32,
 }
 
diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs
index f1a99d22..685618ce 100644
--- a/hercules_ir/src/lib.rs
+++ b/hercules_ir/src/lib.rs
@@ -1,8 +1,10 @@
+pub mod dataflow;
 pub mod def_use;
 pub mod dot;
 pub mod ir;
 pub mod parse;
 
+pub use crate::dataflow::*;
 pub use crate::def_use::*;
 pub use crate::dot::*;
 pub use crate::ir::*;
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index e9638c0b..7d3d2e55 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -116,7 +116,6 @@ fn parse_module<'a>(ir_text: &'a str, context: Context<'a>) -> nom::IResult<&'a
             param_types: vec![],
             return_type: TypeID::new(0),
             nodes: vec![],
-            node_types: None,
             num_dynamic_constants: 0
         };
         context.function_ids.len()
@@ -251,7 +250,6 @@ fn parse_function<'a>(
             param_types: params.into_iter().map(|x| x.5).collect(),
             return_type,
             nodes: fixed_nodes,
-            node_types: None,
             num_dynamic_constants,
         },
     ))
-- 
GitLab


From 2a306ccd1565731b480d34bd457c1c12301be4f9 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 16 Sep 2023 18:09:54 -0500
Subject: [PATCH 025/105] Reverse post order skeleton, bring in bitvec
 dependency

---
 Cargo.lock                  | 40 +++++++++++++++++++++++++++++++++++++
 hercules_ir/Cargo.toml      |  3 ++-
 hercules_ir/src/dataflow.rs | 21 +++++++++++++++++--
 hercules_ir/src/def_use.rs  |  4 ++++
 4 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index bfb5320c..13eff2b4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -56,6 +56,18 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 
+[[package]]
+name = "bitvec"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
+dependencies = [
+ "funty",
+ "radium",
+ "tap",
+ "wyz",
+]
+
 [[package]]
 name = "clap"
 version = "4.4.2"
@@ -102,6 +114,12 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
 
+[[package]]
+name = "funty"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
+
 [[package]]
 name = "heck"
 version = "0.4.1"
@@ -112,6 +130,7 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 name = "hercules_ir"
 version = "0.1.0"
 dependencies = [
+ "bitvec",
  "nom",
  "ordered-float",
 ]
@@ -182,6 +201,12 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "radium"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
+
 [[package]]
 name = "strsim"
 version = "0.10.0"
@@ -199,6 +224,12 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "tap"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.11"
@@ -276,3 +307,12 @@ name = "windows_x86_64_msvc"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
+[[package]]
+name = "wyz"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
+dependencies = [
+ "tap",
+]
diff --git a/hercules_ir/Cargo.toml b/hercules_ir/Cargo.toml
index c7056cc4..aab636b2 100644
--- a/hercules_ir/Cargo.toml
+++ b/hercules_ir/Cargo.toml
@@ -5,4 +5,5 @@ authors = ["Russel Arbore <rarbore2@illinois.edu>"]
 
 [dependencies]
 nom = "*"
-ordered-float = "*"
\ No newline at end of file
+ordered-float = "*"
+bitvec = "*"
\ No newline at end of file
diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 996b9547..5919726a 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -1,3 +1,7 @@
+extern crate bitvec;
+
+use dataflow::bitvec::prelude::*;
+
 use crate::*;
 
 /*
@@ -13,7 +17,7 @@ pub trait Semilattice: Eq {
 /*
  * Top level dataflow function.
  */
-pub fn dataflow<L, F, D>(function: Function, flow_function: F, auxiliary_data: &D) -> Vec<L>
+pub fn dataflow<L, F, D>(function: &Function, flow_function: F, auxiliary_data: &D) -> Vec<L>
 where
     L: Semilattice,
     F: Fn(L, &D, NodeID) -> L,
@@ -30,6 +34,19 @@ where
 /*
  * Compute reverse post order of nodes in function.
  */
-pub fn reverse_postorder(function: Function) -> Vec<NodeID> {
+pub fn reverse_postorder(def_uses: &ImmutableDefUseMap) -> Vec<NodeID> {
+    let order = vec![];
+    let visited = bitvec![u8, Lsb0; 0; def_uses.num_nodes()];
+    let (mut order, _) = reverse_postorder_helper(NodeID::new(0), def_uses, order, visited);
+    order.reverse();
+    order
+}
+
+fn reverse_postorder_helper(
+    node: NodeID,
+    def_uses: &ImmutableDefUseMap,
+    mut order: Vec<NodeID>,
+    mut visited: BitVec<u8, Lsb0>,
+) -> (Vec<NodeID>, BitVec<u8, Lsb0>) {
     todo!()
 }
diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
index 44b34a23..b4888fc6 100644
--- a/hercules_ir/src/def_use.rs
+++ b/hercules_ir/src/def_use.rs
@@ -24,6 +24,10 @@ impl ImmutableDefUseMap {
         let num_edges = self.num_edges(id) as usize;
         &self.users[first_edge..first_edge + num_edges]
     }
+
+    pub fn num_nodes(&self) -> usize {
+        self.first_edges.len()
+    }
 }
 
 /*
-- 
GitLab


From a5c2851312488c88800195db087230fbeb350718 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 16 Sep 2023 18:13:46 -0500
Subject: [PATCH 026/105] Reverse post order

---
 hercules_ir/src/dataflow.rs | 11 ++++++++++-
 hercules_ir/src/def_use.rs  |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 5919726a..9870589c 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -48,5 +48,14 @@ fn reverse_postorder_helper(
     mut order: Vec<NodeID>,
     mut visited: BitVec<u8, Lsb0>,
 ) -> (Vec<NodeID>, BitVec<u8, Lsb0>) {
-    todo!()
+    if visited[node.idx()] {
+        (order, visited)
+    } else {
+        visited.set(node.idx(), true);
+        for user in def_uses.get_users(node) {
+            (order, visited) = reverse_postorder_helper(*user, def_uses, order, visited);
+        }
+        order.push(node);
+        (order, visited)
+    }
 }
diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
index b4888fc6..5c808038 100644
--- a/hercules_ir/src/def_use.rs
+++ b/hercules_ir/src/def_use.rs
@@ -19,7 +19,7 @@ impl ImmutableDefUseMap {
         }
     }
 
-    pub fn get_uses(&self, id: NodeID) -> &[NodeID] {
+    pub fn get_users(&self, id: NodeID) -> &[NodeID] {
         let first_edge = self.first_edges[id.idx()] as usize;
         let num_edges = self.num_edges(id) as usize;
         &self.users[first_edge..first_edge + num_edges]
-- 
GitLab


From f47ac48fcaf870b811027355f6c499deb1f79cc8 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 16 Sep 2023 18:35:18 -0500
Subject: [PATCH 027/105] Generic dataflow function

---
 hercules_ir/src/dataflow.rs | 65 ++++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 5 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 9870589c..c5c7d1ce 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -17,27 +17,76 @@ pub trait Semilattice: Eq {
 /*
  * Top level dataflow function.
  */
-pub fn dataflow<L, F, D>(function: &Function, flow_function: F, auxiliary_data: &D) -> Vec<L>
+pub fn dataflow<L, F, D>(
+    function: &Function,
+    reverse_post_order: &Vec<NodeID>,
+    flow_function: F,
+    auxiliary_data: &D,
+) -> (Vec<L>, Vec<L>)
 where
     L: Semilattice,
-    F: Fn(L, &D, NodeID) -> L,
+    F: Fn(&L, &D, NodeID) -> L,
 {
     // Step 1: create initial set of "in" points. The start node is initialized
     // to bottom, and everything else is initialized to top.
-    let points: Vec<L> = (0..function.nodes.len())
+    let mut ins: Vec<L> = (0..function.nodes.len())
         .map(|id| if id == 0 { L::bottom() } else { L::top() })
         .collect();
 
-    todo!()
+    // Step 2: create initial set of "out" points.
+    let mut outs: Vec<L> = ins
+        .iter()
+        .enumerate()
+        .map(|(id, l)| flow_function(l, auxiliary_data, NodeID::new(id)))
+        .collect();
+
+    // Step 3: compute NodeUses for each node in function.
+    let uses: Vec<NodeUses> = function.nodes.iter().map(|n| get_uses(n)).collect();
+
+    // Step 4: peform main dataflow loop.
+    loop {
+        let mut change = false;
+
+        // Iterate nodes in reverse post order.
+        for node in reverse_post_order {
+            // Compute meet of "out" lattice values.
+            let mut meet = L::top();
+            for u in uses[node.idx()].as_ref() {
+                meet = L::meet(&meet, &outs[u.idx()]);
+            }
+            let new_out = flow_function(&meet, auxiliary_data, *node);
+            if outs[node.idx()] != new_out {
+                change = true;
+            }
+            ins[node.idx()] = meet;
+            outs[node.idx()] = new_out;
+        }
+
+        // If no lattice value changed, we've reached the maximum fixed point
+        // solution, and can terminate.
+        if !change {
+            break;
+        }
+    }
+
+    // Return both in "in" and "out" sets.
+    (ins, outs)
 }
 
 /*
  * Compute reverse post order of nodes in function.
  */
 pub fn reverse_postorder(def_uses: &ImmutableDefUseMap) -> Vec<NodeID> {
-    let order = vec![];
+    // Initialize order vector and bitset for tracking which nodes have been
+    // visited.
+    let order = Vec::with_capacity(def_uses.num_nodes());
     let visited = bitvec![u8, Lsb0; 0; def_uses.num_nodes()];
+
+    // Order and visited are threaded through arguments / return pair of
+    // reverse_postorder_helper for ownership reasons.
     let (mut order, _) = reverse_postorder_helper(NodeID::new(0), def_uses, order, visited);
+
+    // Reverse order in-place.
     order.reverse();
     order
 }
@@ -49,12 +98,18 @@ fn reverse_postorder_helper(
     mut visited: BitVec<u8, Lsb0>,
 ) -> (Vec<NodeID>, BitVec<u8, Lsb0>) {
     if visited[node.idx()] {
+        // If already visited, return early.
         (order, visited)
     } else {
+        // Set visited to true.
         visited.set(node.idx(), true);
+
+        // Iterate over users.
         for user in def_uses.get_users(node) {
             (order, visited) = reverse_postorder_helper(*user, def_uses, order, visited);
         }
+
+        // After iterating users, push this node.
         order.push(node);
         (order, visited)
     }
-- 
GitLab


From 42e9d840d1c14d351774bf8f869fcaea29ac6842 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 16 Sep 2023 20:59:50 -0500
Subject: [PATCH 028/105] Update comments

---
 hercules_ir/src/dataflow.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index c5c7d1ce..8e4216e9 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -54,10 +54,14 @@ where
             for u in uses[node.idx()].as_ref() {
                 meet = L::meet(&meet, &outs[u.idx()]);
             }
+
+            // Compute new "out" value from new "in" value.
             let new_out = flow_function(&meet, auxiliary_data, *node);
             if outs[node.idx()] != new_out {
                 change = true;
             }
+
+            // Update ins and outs vectors.
             ins[node.idx()] = meet;
             outs[node.idx()] = new_out;
         }
@@ -69,7 +73,7 @@ where
         }
     }
 
-    // Return both in "in" and "out" sets.
+    // Step 5: return both "in" and "out" sets.
     (ins, outs)
 }
 
-- 
GitLab


From 3730a6c1da7bf5f50bb48dab504e6f9719d2f410 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 17 Sep 2023 15:53:16 -0500
Subject: [PATCH 029/105] Make dataflow function more generic, typecheck
 skeleton

---
 hercules_ir/src/dataflow.rs  | 37 ++++++++++--------
 hercules_ir/src/lib.rs       |  2 +
 hercules_ir/src/typecheck.rs | 72 ++++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+), 15 deletions(-)
 create mode 100644 hercules_ir/src/typecheck.rs

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 8e4216e9..2e0c28d7 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -15,29 +15,33 @@ pub trait Semilattice: Eq {
 }
 
 /*
- * Top level dataflow function.
+ * Top level dataflow function. This routine is slightly more generic than the
+ * typical textbook definition. The flow function takes an ordered slice of
+ * predecessor lattice values, rather than an unordered set. Thus, the flow
+ * function can perform non-associative operations on the "in" lattice values.
+ * This makes this routine useful for some analyses, such as typechecking.
  */
 pub fn dataflow<L, F, D>(
     function: &Function,
     reverse_post_order: &Vec<NodeID>,
     flow_function: F,
     auxiliary_data: &D,
-) -> (Vec<L>, Vec<L>)
+) -> Vec<L>
 where
     L: Semilattice,
-    F: Fn(&L, &D, NodeID) -> L,
+    F: Fn(&[&L], &D, NodeID) -> L,
 {
     // Step 1: create initial set of "in" points. The start node is initialized
     // to bottom, and everything else is initialized to top.
-    let mut ins: Vec<L> = (0..function.nodes.len())
+    let ins: Vec<L> = (0..function.nodes.len())
         .map(|id| if id == 0 { L::bottom() } else { L::top() })
         .collect();
 
     // Step 2: create initial set of "out" points.
     let mut outs: Vec<L> = ins
-        .iter()
+        .into_iter()
         .enumerate()
-        .map(|(id, l)| flow_function(l, auxiliary_data, NodeID::new(id)))
+        .map(|(id, l)| flow_function(&[&l], auxiliary_data, NodeID::new(id)))
         .collect();
 
     // Step 3: compute NodeUses for each node in function.
@@ -49,20 +53,23 @@ where
 
         // Iterate nodes in reverse post order.
         for node in reverse_post_order {
-            // Compute meet of "out" lattice values.
-            let mut meet = L::top();
+            // Assemble the "out" values of the predecessors of this node. This
+            // vector's definition is hopefully LICMed out, so that we don't do
+            // an allocation per node. This can't be done manually because of
+            // Rust's ownership rules (in particular, pred_outs holds a
+            // reference to a value inside outs, which is mutated below).
+            let mut pred_outs = vec![];
             for u in uses[node.idx()].as_ref() {
-                meet = L::meet(&meet, &outs[u.idx()]);
+                pred_outs.push(&outs[u.idx()]);
             }
 
-            // Compute new "out" value from new "in" value.
-            let new_out = flow_function(&meet, auxiliary_data, *node);
+            // Compute new "out" value from predecessor "out" values.
+            let new_out = flow_function(&pred_outs[..], auxiliary_data, *node);
             if outs[node.idx()] != new_out {
                 change = true;
             }
 
-            // Update ins and outs vectors.
-            ins[node.idx()] = meet;
+            // Update outs vector.
             outs[node.idx()] = new_out;
         }
 
@@ -73,8 +80,8 @@ where
         }
     }
 
-    // Step 5: return both "in" and "out" sets.
-    (ins, outs)
+    // Step 5: return "out" set.
+    outs
 }
 
 /*
diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs
index 685618ce..df72712f 100644
--- a/hercules_ir/src/lib.rs
+++ b/hercules_ir/src/lib.rs
@@ -3,9 +3,11 @@ pub mod def_use;
 pub mod dot;
 pub mod ir;
 pub mod parse;
+pub mod typecheck;
 
 pub use crate::dataflow::*;
 pub use crate::def_use::*;
 pub use crate::dot::*;
 pub use crate::ir::*;
 pub use crate::parse::*;
+pub use crate::typecheck::*;
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
new file mode 100644
index 00000000..2fb7c280
--- /dev/null
+++ b/hercules_ir/src/typecheck.rs
@@ -0,0 +1,72 @@
+use crate::*;
+
+/*
+ * Enum for type semilattice.
+ */
+#[derive(Eq, Clone)]
+enum TypeSemilattice {
+    Unconstrained,
+    Concrete(TypeID),
+    Error(String),
+}
+
+use self::TypeSemilattice::*;
+
+impl PartialEq for TypeSemilattice {
+    fn eq(&self, other: &Self) -> bool {
+        match (self, other) {
+            (Unconstrained, Unconstrained) => true,
+            (Concrete(id1), Concrete(id2)) => id1 == id2,
+            (Error(_), Error(_)) => true,
+            _ => false,
+        }
+    }
+}
+
+impl Semilattice for TypeSemilattice {
+    fn meet(a: &Self, b: &Self) -> Self {
+        match (a, b) {
+            (Unconstrained, Unconstrained) => Unconstrained,
+            (Unconstrained, b) => b.clone(),
+            (a, Unconstrained) => a.clone(),
+            (Concrete(id1), Concrete(id2)) => {
+                if id1 == id2 {
+                    Concrete(*id1)
+                } else {
+                    // Error will only allocate when a type error has occurred.
+                    // In that case, we're less concerned about speed to the
+                    // compiler, and more allocations are acceptable.
+                    Error(format!(
+                        "Couldn't reconcile two different concrete types, with IDs {} and {}.",
+                        id1.idx(),
+                        id2.idx()
+                    ))
+                }
+            }
+            (Error(msg), _) => Error(msg.clone()),
+            (_, Error(msg)) => Error(msg.clone()),
+        }
+    }
+
+    fn bottom() -> Self {
+        // Strings don't allocate unless they actually contain characters, so
+        // this is cheap.
+        Error(String::new())
+    }
+
+    fn top() -> Self {
+        Unconstrained
+    }
+}
+
+/*
+ * Top level typecheck function.
+ */
+pub fn typecheck(
+    function: &Function,
+    types: &Vec<Type>,
+    constants: &Vec<Constant>,
+    reverse_post_order: &Vec<NodeID>,
+) -> Result<Vec<Type>, String> {
+    todo!()
+}
-- 
GitLab


From 7f9ebcd719aabb4dd6287b591c8ddf583f41641e Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 17 Sep 2023 16:33:06 -0500
Subject: [PATCH 030/105] Start typechecking

---
 hercules_ir/src/dataflow.rs  | 10 ++--
 hercules_ir/src/typecheck.rs | 88 +++++++++++++++++++++++++++++++++---
 2 files changed, 88 insertions(+), 10 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 2e0c28d7..9c34f635 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -17,19 +17,21 @@ pub trait Semilattice: Eq {
 /*
  * Top level dataflow function. This routine is slightly more generic than the
  * typical textbook definition. The flow function takes an ordered slice of
- * predecessor lattice values, rather than an unordered set. Thus, the flow
+ * predecessor lattice values, rather a single lattice value. Thus, the flow
  * function can perform non-associative operations on the "in" lattice values.
- * This makes this routine useful for some analyses, such as typechecking.
+ * This makes this routine useful for some analyses, such as typechecking. To
+ * perform the typical behavior, the flow function should start by meeting the
+ * input lattice values into a single lattice value.
  */
 pub fn dataflow<L, F, D>(
     function: &Function,
     reverse_post_order: &Vec<NodeID>,
     flow_function: F,
-    auxiliary_data: &D,
+    auxiliary_data: &mut D,
 ) -> Vec<L>
 where
     L: Semilattice,
-    F: Fn(&[&L], &D, NodeID) -> L,
+    F: Fn(&[&L], &mut D, NodeID) -> L,
 {
     // Step 1: create initial set of "in" points. The start node is initialized
     // to bottom, and everything else is initialized to top.
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 2fb7c280..6dee9b6d 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -1,5 +1,11 @@
 use crate::*;
 
+use std::collections::HashMap;
+
+use Node::*;
+
+use self::TypeSemilattice::*;
+
 /*
  * Enum for type semilattice.
  */
@@ -10,8 +16,6 @@ enum TypeSemilattice {
     Error(String),
 }
 
-use self::TypeSemilattice::*;
-
 impl PartialEq for TypeSemilattice {
     fn eq(&self, other: &Self) -> bool {
         match (self, other) {
@@ -64,9 +68,81 @@ impl Semilattice for TypeSemilattice {
  */
 pub fn typecheck(
     function: &Function,
-    types: &Vec<Type>,
-    constants: &Vec<Constant>,
+    types: &mut Vec<Type>,
+    constants: &Vec<ir::Constant>,
     reverse_post_order: &Vec<NodeID>,
-) -> Result<Vec<Type>, String> {
-    todo!()
+) -> Result<Vec<TypeID>, String> {
+    // Step 1: assemble a reverse type map. This is needed to get or create the
+    // ID of potentially new types.
+    let mut reverse_type_map: HashMap<Type, TypeID> = types
+        .iter()
+        .enumerate()
+        .map(|(idx, ty)| (ty.clone(), TypeID::new(idx)))
+        .collect();
+
+    // Step 2: run dataflow. This is an occurrence of dataflow where the flow
+    // function performs a non-associative operation on the predecessor "out"
+    // values.
+    let result = dataflow(
+        function,
+        reverse_post_order,
+        typeflow,
+        &mut (function, types, constants, &mut reverse_type_map),
+    );
+
+    // Step 3: convert the individual type lattice values into a list of
+    // concrete type values, or a single error.
+    result
+        .into_iter()
+        .map(|x| match x {
+            Unconstrained => Err(String::from("Found unconstrained type in program.")),
+            Concrete(id) => Ok(id),
+            Error(msg) => Err(msg),
+        })
+        .collect()
+}
+
+/*
+ * Flow function for typechecking.
+ */
+fn typeflow(
+    inputs: &[&TypeSemilattice],
+    auxiliary: &mut (
+        &Function,
+        &mut Vec<Type>,
+        &Vec<ir::Constant>,
+        &mut HashMap<Type, TypeID>,
+    ),
+    id: NodeID,
+) -> TypeSemilattice {
+    let (function, types, constant, reverse_type_map) = auxiliary;
+
+    // Whenever we want to reference a specific type (for example, for the
+    // start node), we need to get its type ID. This helper function gets the
+    // ID if it already exists. If the type doesn't already exist, the helper
+    // adds it to the type intern list.
+    let mut get_type_id = |ty: Type| -> TypeID {
+        if let Some(id) = reverse_type_map.get(&ty) {
+            *id
+        } else {
+            let id = TypeID::new(reverse_type_map.len());
+            reverse_type_map.insert(ty.clone(), id);
+            types.push(ty);
+            id
+        }
+    };
+
+    // Each node requires different type logic. This unfortunately results in a
+    // large match statement. Oh well. Each arm returns the lattice value for
+    // the "out" type of the node.
+    match function.nodes[id.idx()] {
+        Start => {
+            if inputs.len() != 0 {
+                Error(String::from("Start node must have zero inputs."))
+            } else {
+                Concrete(get_type_id(Type::Control(Box::new([]))))
+            }
+        }
+        _ => todo!(),
+    }
 }
-- 
GitLab


From 3ab6b3777ed328cc51853b7f0a9902c091590581 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 17 Sep 2023 16:59:45 -0500
Subject: [PATCH 031/105] Region node type check

---
 hercules_ir/src/typecheck.rs | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 6dee9b6d..299fe295 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -135,7 +135,7 @@ fn typeflow(
     // Each node requires different type logic. This unfortunately results in a
     // large match statement. Oh well. Each arm returns the lattice value for
     // the "out" type of the node.
-    match function.nodes[id.idx()] {
+    match &function.nodes[id.idx()] {
         Start => {
             if inputs.len() != 0 {
                 Error(String::from("Start node must have zero inputs."))
@@ -143,6 +143,29 @@ fn typeflow(
                 Concrete(get_type_id(Type::Control(Box::new([]))))
             }
         }
+        Region { preds: _ } => {
+            if inputs.len() == 0 {
+                Error(String::from(
+                    "Region node must have at least one predecessor.",
+                ))
+            } else {
+                let mut meet = inputs[0].clone();
+                for l in inputs[1..].iter() {
+                    meet = TypeSemilattice::meet(&meet, l);
+                }
+                if let Concrete(id) = meet {
+                    if let Type::Control(_) = types[id.idx()] {
+                        meet
+                    } else {
+                        Error(String::from(
+                            "Region node's input type cannot be non-control.",
+                        ))
+                    }
+                } else {
+                    meet
+                }
+            }
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From d8085193dbf8eaf9004924ae83488bf521687785 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 17 Sep 2023 17:31:22 -0500
Subject: [PATCH 032/105] Boolean type / constants, if node typecheck

---
 hercules_ir/src/ir.rs        | 16 ++++++
 hercules_ir/src/parse.rs     | 10 ++++
 hercules_ir/src/typecheck.rs | 99 +++++++++++++++++++++++++-----------
 3 files changed, 96 insertions(+), 29 deletions(-)

diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 3f73d6dc..1c865b9b 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -47,6 +47,7 @@ pub struct Function {
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Type {
     Control(Box<[DynamicConstantID]>),
+    Boolean,
     Integer8,
     Integer16,
     Integer32,
@@ -62,6 +63,20 @@ pub enum Type {
     Array(TypeID, DynamicConstantID),
 }
 
+impl Type {
+    pub fn is_control(&self) -> bool {
+        if let Type::Control(_) = self {
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn is_bool(&self) -> bool {
+        self == &Type::Boolean
+    }
+}
+
 /*
  * Constants are pretty standard in Hercules IR. Float constants used the
  * ordered_float crate so that constants can be keys in maps (used for
@@ -72,6 +87,7 @@ pub enum Type {
  */
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Constant {
+    Boolean(bool),
     Integer8(i8),
     Integer16(i16),
     Integer32(i32),
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 7d3d2e55..d86038b0 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -616,6 +616,7 @@ fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
             Type::Control(Box::new([]))
         }),
         // Primitive types are written in Rust style.
+        nom::combinator::map(nom::bytes::complete::tag("bool"), |_| Type::Boolean),
         nom::combinator::map(nom::bytes::complete::tag("i8"), |_| Type::Integer8),
         nom::combinator::map(nom::bytes::complete::tag("i16"), |_| Type::Integer16),
         nom::combinator::map(nom::bytes::complete::tag("i32"), |_| Type::Integer32),
@@ -755,6 +756,7 @@ fn parse_constant<'a>(
             input: ir_text,
             code: nom::error::ErrorKind::IsNot,
         }))?,
+        Type::Boolean => parse_boolean(ir_text)?,
         Type::Integer8 => parse_integer8(ir_text)?,
         Type::Integer16 => parse_integer16(ir_text)?,
         Type::Integer32 => parse_integer32(ir_text)?,
@@ -802,6 +804,14 @@ fn parse_prim<'a, T: FromStr>(ir_text: &'a str, chars: &'static str) -> nom::IRe
     Ok((ir_text, x))
 }
 
+fn parse_boolean<'a>(ir_text: &'a str) -> nom::IResult<&'a str, Constant> {
+    let (ir_text, val) = nom::branch::alt((
+        nom::combinator::map(nom::bytes::complete::tag("false"), |_| false),
+        nom::combinator::map(nom::bytes::complete::tag("true"), |_| true),
+    ))(ir_text)?;
+    Ok((ir_text, Constant::Boolean(val)))
+}
+
 fn parse_integer8<'a>(ir_text: &'a str) -> nom::IResult<&'a str, Constant> {
     let (ir_text, num) = parse_prim(ir_text, "-1234567890")?;
     Ok((ir_text, Constant::Integer8(num)))
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 299fe295..828a8b69 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -121,16 +121,17 @@ fn typeflow(
     // start node), we need to get its type ID. This helper function gets the
     // ID if it already exists. If the type doesn't already exist, the helper
     // adds it to the type intern list.
-    let mut get_type_id = |ty: Type| -> TypeID {
-        if let Some(id) = reverse_type_map.get(&ty) {
-            *id
-        } else {
-            let id = TypeID::new(reverse_type_map.len());
-            reverse_type_map.insert(ty.clone(), id);
-            types.push(ty);
-            id
-        }
-    };
+    let get_type_id =
+        |ty: Type, types: &mut Vec<Type>, reverse_type_map: &mut HashMap<Type, TypeID>| -> TypeID {
+            if let Some(id) = reverse_type_map.get(&ty) {
+                *id
+            } else {
+                let id = TypeID::new(reverse_type_map.len());
+                reverse_type_map.insert(ty.clone(), id);
+                types.push(ty);
+                id
+            }
+        };
 
     // Each node requires different type logic. This unfortunately results in a
     // large match statement. Oh well. Each arm returns the lattice value for
@@ -138,33 +139,73 @@ fn typeflow(
     match &function.nodes[id.idx()] {
         Start => {
             if inputs.len() != 0 {
-                Error(String::from("Start node must have zero inputs."))
-            } else {
-                Concrete(get_type_id(Type::Control(Box::new([]))))
+                return Error(String::from("Start node must have zero inputs."));
             }
+
+            // The start node is the producer of the control token.
+            Concrete(get_type_id(
+                Type::Control(Box::new([])),
+                types,
+                reverse_type_map,
+            ))
         }
         Region { preds: _ } => {
             if inputs.len() == 0 {
-                Error(String::from(
-                    "Region node must have at least one predecessor.",
-                ))
-            } else {
-                let mut meet = inputs[0].clone();
-                for l in inputs[1..].iter() {
-                    meet = TypeSemilattice::meet(&meet, l);
+                return Error(String::from("Region node must have at least one input."));
+            }
+
+            let mut meet = inputs[0].clone();
+            for l in inputs[1..].iter() {
+                meet = TypeSemilattice::meet(&meet, l);
+            }
+
+            // Only special case is if concrete type is non-control. In
+            // this case, we override that concrete type with an error,
+            // since the input types must all be control types. Any other
+            // lattice value can be returned as-is.
+            if let Concrete(id) = meet {
+                if !types[id.idx()].is_control() {
+                    return Error(String::from(
+                        "Region node's input type cannot be non-control.",
+                    ));
                 }
-                if let Concrete(id) = meet {
-                    if let Type::Control(_) = types[id.idx()] {
-                        meet
-                    } else {
-                        Error(String::from(
-                            "Region node's input type cannot be non-control.",
-                        ))
-                    }
+            }
+
+            meet
+        }
+        If {
+            control: _,
+            cond: _,
+        } => {
+            if inputs.len() != 2 {
+                return Error(String::from("If node must have exactly two inputs."));
+            }
+
+            // Check type of data input first, since we may return while
+            // checking control input.
+            if let Concrete(id) = inputs[1] {
+                if !types[id.idx()].is_bool() {
+                    return Error(String::from(
+                        "If node's condition input cannot have non-boolean type.",
+                    ));
+                }
+            }
+
+            if let Concrete(id) = inputs[0] {
+                if !types[id.idx()].is_control() {
+                    return Error(String::from(
+                        "If node's control input cannot have non-control type.",
+                    ));
                 } else {
-                    meet
+                    // At this point, data input is already "good" (not
+                    // necessarily non-error, but at this point the lattice
+                    // output doesn't need to be an error).
+                    let out_ty = Type::Product(Box::new([*id, *id]));
+                    return Concrete(get_type_id(out_ty, types, reverse_type_map));
                 }
             }
+
+            inputs[0].clone()
         }
         _ => todo!(),
     }
-- 
GitLab


From e52ea8aa0b5f49756cb51f00e7a1a251a4573ee6 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 17 Sep 2023 17:38:31 -0500
Subject: [PATCH 033/105] Fix if node

---
 hercules_ir/src/typecheck.rs | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 828a8b69..3be7bafc 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -16,6 +16,28 @@ enum TypeSemilattice {
     Error(String),
 }
 
+impl TypeSemilattice {
+    fn is_unconstrained(&self) -> bool {
+        self == &Unconstrained
+    }
+
+    fn is_concrete(&self) -> bool {
+        if let Concrete(_) = self {
+            true
+        } else {
+            false
+        }
+    }
+
+    fn is_error(&self) -> bool {
+        if let Error(_) = self {
+            true
+        } else {
+            false
+        }
+    }
+}
+
 impl PartialEq for TypeSemilattice {
     fn eq(&self, other: &Self) -> bool {
         match (self, other) {
@@ -189,6 +211,10 @@ fn typeflow(
                         "If node's condition input cannot have non-boolean type.",
                     ));
                 }
+            } else if inputs[1].is_error() {
+                // If an input has an error lattice value, it must be
+                // propagated.
+                return inputs[1].clone();
             }
 
             if let Concrete(id) = inputs[0] {
@@ -197,9 +223,6 @@ fn typeflow(
                         "If node's control input cannot have non-control type.",
                     ));
                 } else {
-                    // At this point, data input is already "good" (not
-                    // necessarily non-error, but at this point the lattice
-                    // output doesn't need to be an error).
                     let out_ty = Type::Product(Box::new([*id, *id]));
                     return Concrete(get_type_id(out_ty, types, reverse_type_map));
                 }
-- 
GitLab


From e2315525037443101398705a871dd136f1ffbfc5 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 18 Sep 2023 12:51:29 -0500
Subject: [PATCH 034/105] Fork typecheck

---
 hercules_ir/src/typecheck.rs | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 3be7bafc..02bd2e07 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -230,6 +230,39 @@ fn typeflow(
 
             inputs[0].clone()
         }
+        Fork { control: _, factor } => {
+            if inputs.len() != 1 {
+                return Error(String::from("Fork node must have exactly one input."));
+            }
+
+            if let Concrete(id) = inputs[0] {
+                if let Type::Control(factors) = &types[id.idx()] {
+                    // Fork adds a new factor to the thread spawn factor list.
+                    let mut new_factors = factors.clone().into_vec();
+                    new_factors.push(*factor);
+
+                    // Out type is a pair - first element is the control type,
+                    // second is the index type (u64). Each thread gets a
+                    // different thread ID at runtime.
+                    let control_out_id = get_type_id(
+                        Type::Control(new_factors.into_boxed_slice()),
+                        types,
+                        reverse_type_map,
+                    );
+                    let index_out_id =
+                        get_type_id(Type::UnsignedInteger64, types, reverse_type_map);
+                    let out_ty = Type::Product(Box::new([control_out_id, index_out_id]));
+                    return Concrete(get_type_id(out_ty, types, reverse_type_map));
+                } else {
+                    return Error(String::from(
+                        "Fork node's input cannot have non-control type.",
+                    ));
+                }
+            }
+
+            inputs[0].clone()
+        }
+
         _ => todo!(),
     }
 }
-- 
GitLab


From 4bfd514cb55644696e63f1c1af7a3752b15853d6 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 18 Sep 2023 13:04:47 -0500
Subject: [PATCH 035/105] Join typecheck

---
 hercules_ir/src/typecheck.rs | 53 ++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 02bd2e07..bd823958 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -262,7 +262,60 @@ fn typeflow(
 
             inputs[0].clone()
         }
+        Join {
+            control: _,
+            data: _,
+        } => {
+            if inputs.len() != 2 {
+                return Error(String::from("Join node must have exactly two inputs."));
+            }
+
+            // If the data input isn't concrete, we can't assemble a concrete
+            // output type yet, so just return data input's type (either
+            // unconstrained or error) instead.
+            if let Concrete(data_id) = inputs[1] {
+                if types[data_id.idx()].is_control() {
+                    return Error(String::from(
+                        "Join node's second input must not have a control type.",
+                    ));
+                }
+
+                // Similarly, if the control input isn't concrete yet, we can't
+                // assemble a concrete output type, so just return the control
+                // input non-concrete type.
+                if let Concrete(control_id) = inputs[0] {
+                    if let Type::Control(factors) = &types[control_id.idx()] {
+                        // Join removes a factor from the factor list.
+                        if factors.len() == 0 {
+                            return Error(String::from("Join node's first input must have a control type with at least one thread replication factor."));
+                        }
+                        let mut new_factors = factors.clone().into_vec();
+                        let dc_id = new_factors.pop().unwrap();
 
+                        // Out type is a pair - first element is the control
+                        // type, second is the result array from the parallel
+                        // computation.
+                        let control_out_id = get_type_id(
+                            Type::Control(new_factors.into_boxed_slice()),
+                            types,
+                            reverse_type_map,
+                        );
+                        let array_out_id =
+                            get_type_id(Type::Array(*data_id, dc_id), types, reverse_type_map);
+                        let out_ty = Type::Product(Box::new([control_out_id, array_out_id]));
+                        return Concrete(get_type_id(out_ty, types, reverse_type_map));
+                    } else {
+                        return Error(String::from(
+                            "Join node's first input cannot have non-control type.",
+                        ));
+                    }
+                } else {
+                    return inputs[0].clone();
+                }
+            }
+
+            inputs[1].clone()
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From a5f39a9189095d83e4a81b4b004131c6a5208c3a Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 18 Sep 2023 13:17:06 -0500
Subject: [PATCH 036/105] Phi typecheck

---
 hercules_ir/src/typecheck.rs | 37 ++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index bd823958..9137338c 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -316,6 +316,43 @@ fn typeflow(
 
             inputs[1].clone()
         }
+        Phi {
+            control: _,
+            data: _,
+        } => {
+            if inputs.len() < 2 {
+                return Error(String::from("Phi node must have at least two inputs."));
+            }
+
+            // Check type of control input first, since this may produce an
+            // error.
+            if let Concrete(id) = inputs[inputs.len() - 1] {
+                if !types[id.idx()].is_control() {
+                    return Error(String::from(
+                        "Phi node's control input cannot have non-control type.",
+                    ));
+                }
+            } else if inputs[1].is_error() {
+                // If an input has an error lattice value, it must be
+                // propagated.
+                return inputs[1].clone();
+            }
+
+            // Output type of phi node is same type as every data input.
+            let mut meet = inputs[0].clone();
+            for l in inputs[1..inputs.len() - 1].iter() {
+                if let Concrete(id) = l {
+                    if types[id.idx()].is_control() {
+                        return Error(String::from(
+                            "Phi node's data inputs cannot have control type.",
+                        ));
+                    }
+                }
+                meet = TypeSemilattice::meet(&meet, l);
+            }
+
+            meet
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 7834224472f14848329613a9f6dbd0040d7bcd6c Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 18 Sep 2023 13:18:09 -0500
Subject: [PATCH 037/105] Fix phi typecheck

---
 hercules_ir/src/typecheck.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 9137338c..007cf2f0 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -332,10 +332,10 @@ fn typeflow(
                         "Phi node's control input cannot have non-control type.",
                     ));
                 }
-            } else if inputs[1].is_error() {
+            } else if inputs[inputs.len() - 1].is_error() {
                 // If an input has an error lattice value, it must be
                 // propagated.
-                return inputs[1].clone();
+                return inputs[inputs.len() - 1].clone();
             }
 
             // Output type of phi node is same type as every data input.
-- 
GitLab


From d021a24f60efcadc9c91b08e8285f6919e2946c8 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 18 Sep 2023 13:32:16 -0500
Subject: [PATCH 038/105] Start typecheck for return

---
 hercules_ir/src/ir.rs        | 14 +++++++++++
 hercules_ir/src/typecheck.rs | 46 ++++++++++++++++++++++++++++++++----
 2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 1c865b9b..e575d7cb 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -209,6 +209,20 @@ pub enum Node {
     },
 }
 
+impl Node {
+    pub fn is_return(&self) -> bool {
+        if let Node::Return {
+            control: _,
+            value: _,
+        } = self
+        {
+            true
+        } else {
+            false
+        }
+    }
+}
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum UnaryOperator {
     Not,
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 007cf2f0..b5fba285 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -1,6 +1,7 @@
 use crate::*;
 
 use std::collections::HashMap;
+use std::iter::zip;
 
 use Node::*;
 
@@ -36,6 +37,15 @@ impl TypeSemilattice {
             false
         }
     }
+
+    // During typeflow, the return node is given an error type, even when
+    // typechecking succeeds. This is done so that any node that uses a return
+    // node will have its output type set to this error. In the top-level type
+    // checking function, we ignore this particular error if the node being
+    // checked is a return node.
+    fn get_return_type_error() -> Self {
+        Error(String::from("No node can take a return node as input."))
+    }
 }
 
 impl PartialEq for TypeSemilattice {
@@ -112,14 +122,30 @@ pub fn typecheck(
         &mut (function, types, constants, &mut reverse_type_map),
     );
 
-    // Step 3: convert the individual type lattice values into a list of
+    // Step 3: add type for empty product. This is the type of the return node.
+    let empty_prod_ty = Type::Product(Box::new([]));
+    let empty_prod_id = if let Some(id) = reverse_type_map.get(&empty_prod_ty) {
+        *id
+    } else {
+        let id = TypeID::new(reverse_type_map.len());
+        reverse_type_map.insert(empty_prod_ty.clone(), id);
+        types.push(empty_prod_ty);
+        id
+    };
+
+    // Step 4: convert the individual type lattice values into a list of
     // concrete type values, or a single error.
-    result
-        .into_iter()
-        .map(|x| match x {
+    zip(result.into_iter(), function.nodes.iter())
+        .map(|(x, n)| match x {
             Unconstrained => Err(String::from("Found unconstrained type in program.")),
             Concrete(id) => Ok(id),
-            Error(msg) => Err(msg),
+            Error(msg) => {
+                if n.is_return() && Error(msg.clone()) == TypeSemilattice::get_return_type_error() {
+                    Ok(empty_prod_id)
+                } else {
+                    Err(msg)
+                }
+            }
         })
         .collect()
 }
@@ -353,6 +379,16 @@ fn typeflow(
 
             meet
         }
+        Return {
+            control: _,
+            value: _,
+        } => {
+            if inputs.len() != 2 {
+                return Error(String::from("Return node must have exactly two inputs."));
+            }
+
+            todo!()
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 3480950e76ef26b6fb920761c4bb25cd217e632d Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 18 Sep 2023 17:58:53 -0500
Subject: [PATCH 039/105] Return node typecheck

---
 hercules_ir/src/typecheck.rs | 37 +++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index b5fba285..de7b28c5 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -387,7 +387,42 @@ fn typeflow(
                 return Error(String::from("Return node must have exactly two inputs."));
             }
 
-            todo!()
+            // Check type of control input first, since this may produce an
+            // error.
+            if let Concrete(id) = inputs[0] {
+                if let Type::Control(factors) = &types[id.idx()] {
+                    if factors.len() != 0 {
+                        return Error(String::from(
+                            "Return node's control input must have no thread replications.",
+                        ));
+                    }
+                } else {
+                    return Error(String::from(
+                        "Return node's control input cannot have non-control type.",
+                    ));
+                }
+            } else if inputs[0].is_error() {
+                // If an input has an error lattice value, it must be
+                // propagated.
+                return inputs[0].clone();
+            }
+
+            if let Concrete(id) = inputs[1] {
+                if *id != function.return_type {
+                    return Error(String::from("Return node's data input type must be the same as the function's return type."));
+                }
+            } else if inputs[1].is_error() {
+                return inputs[1].clone();
+            }
+
+            // Return nodes are special - they cannot have any users. Thus, we
+            // set the return node's lattice value to a specific error. When
+            // converting lattice values to types, this particular error gets
+            // converted to an empty product type if it's the type of a return
+            // node. If any node uses a return node, it's lattice value will be
+            // this error. This will result in a normal error when attempting to
+            // extract conrete types.
+            TypeSemilattice::get_return_type_error()
         }
         _ => todo!(),
     }
-- 
GitLab


From a573f1f5c631b2a105c8313a1f5bb67531f66c31 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 18 Sep 2023 21:27:53 -0500
Subject: [PATCH 040/105] Parameter node typecheck

---
 hercules_ir/src/typecheck.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index de7b28c5..21d22883 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -424,6 +424,19 @@ fn typeflow(
             // extract conrete types.
             TypeSemilattice::get_return_type_error()
         }
+        Parameter { index } => {
+            if inputs.len() != 0 {
+                return Error(String::from("Parameter node must have zero inputs."));
+            }
+
+            if *index >= function.param_types.len() {
+                return Error(String::from("Parameter node must reference an index corresponding to an existing function argument."));
+            }
+
+            let param_id = function.param_types[*index];
+
+            TypeSemilattice::Concrete(param_id)
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 5a94d9bbbb64c802e2bc34f67d7c14d9e8fa984e Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 19 Sep 2023 09:19:52 -0500
Subject: [PATCH 041/105] Constant node typecheck

---
 hercules_ir/src/typecheck.rs | 72 ++++++++++++++++++++++++++++++------
 1 file changed, 60 insertions(+), 12 deletions(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 21d22883..1ed9b438 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -3,8 +3,6 @@ use crate::*;
 use std::collections::HashMap;
 use std::iter::zip;
 
-use Node::*;
-
 use self::TypeSemilattice::*;
 
 /*
@@ -163,7 +161,7 @@ fn typeflow(
     ),
     id: NodeID,
 ) -> TypeSemilattice {
-    let (function, types, constant, reverse_type_map) = auxiliary;
+    let (function, types, constants, reverse_type_map) = auxiliary;
 
     // Whenever we want to reference a specific type (for example, for the
     // start node), we need to get its type ID. This helper function gets the
@@ -185,7 +183,7 @@ fn typeflow(
     // large match statement. Oh well. Each arm returns the lattice value for
     // the "out" type of the node.
     match &function.nodes[id.idx()] {
-        Start => {
+        Node::Start => {
             if inputs.len() != 0 {
                 return Error(String::from("Start node must have zero inputs."));
             }
@@ -197,7 +195,7 @@ fn typeflow(
                 reverse_type_map,
             ))
         }
-        Region { preds: _ } => {
+        Node::Region { preds: _ } => {
             if inputs.len() == 0 {
                 return Error(String::from("Region node must have at least one input."));
             }
@@ -221,7 +219,7 @@ fn typeflow(
 
             meet
         }
-        If {
+        Node::If {
             control: _,
             cond: _,
         } => {
@@ -256,7 +254,7 @@ fn typeflow(
 
             inputs[0].clone()
         }
-        Fork { control: _, factor } => {
+        Node::Fork { control: _, factor } => {
             if inputs.len() != 1 {
                 return Error(String::from("Fork node must have exactly one input."));
             }
@@ -288,7 +286,7 @@ fn typeflow(
 
             inputs[0].clone()
         }
-        Join {
+        Node::Join {
             control: _,
             data: _,
         } => {
@@ -342,7 +340,7 @@ fn typeflow(
 
             inputs[1].clone()
         }
-        Phi {
+        Node::Phi {
             control: _,
             data: _,
         } => {
@@ -379,7 +377,7 @@ fn typeflow(
 
             meet
         }
-        Return {
+        Node::Return {
             control: _,
             value: _,
         } => {
@@ -424,7 +422,7 @@ fn typeflow(
             // extract conrete types.
             TypeSemilattice::get_return_type_error()
         }
-        Parameter { index } => {
+        Node::Parameter { index } => {
             if inputs.len() != 0 {
                 return Error(String::from("Parameter node must have zero inputs."));
             }
@@ -435,7 +433,57 @@ fn typeflow(
 
             let param_id = function.param_types[*index];
 
-            TypeSemilattice::Concrete(param_id)
+            Concrete(param_id)
+        }
+        Node::Constant { id } => {
+            if inputs.len() != 0 {
+                return Error(String::from("Constant node must have zero inputs."));
+            }
+
+            match constants[id.idx()] {
+                Constant::Boolean(_) => {
+                    Concrete(get_type_id(Type::Boolean, types, reverse_type_map))
+                }
+                Constant::Integer8(_) => {
+                    Concrete(get_type_id(Type::Integer8, types, reverse_type_map))
+                }
+                Constant::Integer16(_) => {
+                    Concrete(get_type_id(Type::Integer16, types, reverse_type_map))
+                }
+                Constant::Integer32(_) => {
+                    Concrete(get_type_id(Type::Integer32, types, reverse_type_map))
+                }
+                Constant::Integer64(_) => {
+                    Concrete(get_type_id(Type::Integer64, types, reverse_type_map))
+                }
+                Constant::UnsignedInteger8(_) => {
+                    Concrete(get_type_id(Type::UnsignedInteger8, types, reverse_type_map))
+                }
+                Constant::UnsignedInteger16(_) => Concrete(get_type_id(
+                    Type::UnsignedInteger16,
+                    types,
+                    reverse_type_map,
+                )),
+                Constant::UnsignedInteger32(_) => Concrete(get_type_id(
+                    Type::UnsignedInteger32,
+                    types,
+                    reverse_type_map,
+                )),
+                Constant::UnsignedInteger64(_) => Concrete(get_type_id(
+                    Type::UnsignedInteger64,
+                    types,
+                    reverse_type_map,
+                )),
+                Constant::Float32(_) => {
+                    Concrete(get_type_id(Type::Float32, types, reverse_type_map))
+                }
+                Constant::Float64(_) => {
+                    Concrete(get_type_id(Type::Float64, types, reverse_type_map))
+                }
+                Constant::Product(id, _) => Concrete(id),
+                Constant::Summation(id, _, _) => Concrete(id),
+                Constant::Array(id, _) => Concrete(id),
+            }
         }
         _ => todo!(),
     }
-- 
GitLab


From 341ef2172d8cb08f047ad33abc3095dbdab2c2b9 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 19 Sep 2023 09:23:21 -0500
Subject: [PATCH 042/105] DynamicConstant node typecheck

---
 hercules_ir/src/typecheck.rs | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 1ed9b438..92b0e90c 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -485,6 +485,17 @@ fn typeflow(
                 Constant::Array(id, _) => Concrete(id),
             }
         }
+        Node::DynamicConstant { id: _ } => {
+            if inputs.len() != 0 {
+                return Error(String::from("DynamicConstant node must have zero inputs."));
+            }
+
+            Concrete(get_type_id(
+                Type::UnsignedInteger64,
+                types,
+                reverse_type_map,
+            ))
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 5b3c2e9175d77d2b81da4c347d511b688b0e02c9 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 19 Sep 2023 10:03:32 -0500
Subject: [PATCH 043/105] Unary node typecheck

---
 hercules_ir/src/ir.rs        | 33 +++++++++++++++++++++++++++++++++
 hercules_ir/src/typecheck.rs | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index e575d7cb..5e871bbe 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -75,6 +75,36 @@ impl Type {
     pub fn is_bool(&self) -> bool {
         self == &Type::Boolean
     }
+
+    pub fn is_fixed(&self) -> bool {
+        match self {
+            Type::Integer8 => true,
+            Type::Integer16 => true,
+            Type::Integer32 => true,
+            Type::Integer64 => true,
+            Type::UnsignedInteger8 => true,
+            Type::UnsignedInteger16 => true,
+            Type::UnsignedInteger32 => true,
+            Type::UnsignedInteger64 => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_float(&self) -> bool {
+        match self {
+            Type::Float32 => true,
+            Type::Float64 => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_arithmetic(&self) -> bool {
+        self.is_fixed() || self.is_float()
+    }
+
+    pub fn is_primitive(&self) -> bool {
+        self.is_bool() || self.is_fixed() || self.is_float()
+    }
 }
 
 /*
@@ -243,6 +273,9 @@ pub enum BinaryOperator {
     GTE,
     EQ,
     NE,
+    Or,
+    And,
+    Xor,
     LSh,
     RSh,
 }
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 92b0e90c..634f03af 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -496,6 +496,39 @@ fn typeflow(
                 reverse_type_map,
             ))
         }
+        Node::Unary { input: _, op } => {
+            if inputs.len() != 1 {
+                return Error(String::from("Unary node must have exactly one input."));
+            }
+
+            if let Concrete(id) = inputs[0] {
+                match op {
+                    UnaryOperator::Not => {
+                        if !types[id.idx()].is_bool() {
+                            return Error(String::from(
+                                "Not unary node input cannot have non-boolean type.",
+                            ));
+                        }
+                    }
+                    UnaryOperator::Neg => {
+                        if !types[id.idx()].is_arithmetic() {
+                            return Error(String::from(
+                                "Neg unary node input cannot have non-arithmetic type.",
+                            ));
+                        }
+                    }
+                    UnaryOperator::Bitflip => {
+                        if !types[id.idx()].is_fixed() {
+                            return Error(String::from(
+                                "Bitflip unary node input cannot have non-fixed type.",
+                            ));
+                        }
+                    }
+                }
+            }
+
+            inputs[0].clone()
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 0f022aa1288adc6e716cf992238ad3e9c8517437 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 19 Sep 2023 12:17:45 -0500
Subject: [PATCH 044/105] Binary node typecheck

---
 hercules_ir/src/dot.rs       |  3 ++
 hercules_ir/src/parse.rs     |  3 ++
 hercules_ir/src/typecheck.rs | 63 ++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 3aa4326b..d98c973b 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -342,6 +342,9 @@ fn get_string_bop_kind(bop: BinaryOperator) -> &'static str {
         BinaryOperator::GTE => "gte",
         BinaryOperator::EQ => "eq",
         BinaryOperator::NE => "ne",
+        BinaryOperator::Or => "or",
+        BinaryOperator::And => "and",
+        BinaryOperator::Xor => "xor",
         BinaryOperator::LSh => "lsh",
         BinaryOperator::RSh => "rsh",
     }
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index d86038b0..5c28984b 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -294,6 +294,9 @@ fn parse_node<'a>(
         "gte" => parse_binary(ir_text, context, BinaryOperator::GTE)?,
         "eq" => parse_binary(ir_text, context, BinaryOperator::EQ)?,
         "ne" => parse_binary(ir_text, context, BinaryOperator::NE)?,
+        "or" => parse_binary(ir_text, context, BinaryOperator::Or)?,
+        "and" => parse_binary(ir_text, context, BinaryOperator::And)?,
+        "xor" => parse_binary(ir_text, context, BinaryOperator::Xor)?,
         "lsh" => parse_binary(ir_text, context, BinaryOperator::LSh)?,
         "rsh" => parse_binary(ir_text, context, BinaryOperator::RSh)?,
         "call" => parse_call(ir_text, context)?,
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 634f03af..359cb1aa 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -529,6 +529,69 @@ fn typeflow(
 
             inputs[0].clone()
         }
+        Node::Binary {
+            left: _,
+            right: _,
+            op,
+        } => {
+            if inputs.len() != 1 {
+                return Error(String::from("Binary node must have exactly two inputs."));
+            }
+
+            let input_ty = TypeSemilattice::meet(inputs[0], inputs[1]);
+
+            if let Concrete(id) = input_ty {
+                match op {
+                    BinaryOperator::Add
+                    | BinaryOperator::Sub
+                    | BinaryOperator::Mul
+                    | BinaryOperator::Div
+                    | BinaryOperator::Rem => {
+                        if !types[id.idx()].is_arithmetic() {
+                            return Error(format!(
+                                "{:?} binary node input cannot have non-arithmetic type.",
+                                op,
+                            ));
+                        }
+                    }
+                    BinaryOperator::LT
+                    | BinaryOperator::LTE
+                    | BinaryOperator::GT
+                    | BinaryOperator::GTE => {
+                        if !types[id.idx()].is_arithmetic() {
+                            return Error(format!(
+                                "{:?} binary node input cannot have non-arithmetic type.",
+                                op,
+                            ));
+                        }
+                        return Concrete(get_type_id(Type::Boolean, types, reverse_type_map));
+                    }
+                    BinaryOperator::EQ | BinaryOperator::NE => {
+                        if types[id.idx()].is_control() {
+                            return Error(format!(
+                                "{:?} binary node input cannot have control type.",
+                                op,
+                            ));
+                        }
+                        return Concrete(get_type_id(Type::Boolean, types, reverse_type_map));
+                    }
+                    BinaryOperator::Or
+                    | BinaryOperator::And
+                    | BinaryOperator::Xor
+                    | BinaryOperator::LSh
+                    | BinaryOperator::RSh => {
+                        if !types[id.idx()].is_fixed() {
+                            return Error(format!(
+                                "{:?} binary node input cannot have non-fixed type.",
+                                op,
+                            ));
+                        }
+                    }
+                }
+            }
+
+            inputs[0].clone()
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 3564d2fab10de0eb1ed2adfd71a45c3129bbe486 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 19 Sep 2023 12:28:44 -0500
Subject: [PATCH 045/105] Commenting and fixes

---
 hercules_ir/src/typecheck.rs | 61 +++++++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 359cb1aa..52de1e20 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -99,7 +99,8 @@ impl Semilattice for TypeSemilattice {
 pub fn typecheck(
     function: &Function,
     types: &mut Vec<Type>,
-    constants: &Vec<ir::Constant>,
+    constants: &Vec<Constant>,
+    dynamic_constants: &Vec<DynamicConstant>,
     reverse_post_order: &Vec<NodeID>,
 ) -> Result<Vec<TypeID>, String> {
     // Step 1: assemble a reverse type map. This is needed to get or create the
@@ -117,7 +118,13 @@ pub fn typecheck(
         function,
         reverse_post_order,
         typeflow,
-        &mut (function, types, constants, &mut reverse_type_map),
+        &mut (
+            function,
+            types,
+            constants,
+            dynamic_constants,
+            &mut reverse_type_map,
+        ),
     );
 
     // Step 3: add type for empty product. This is the type of the return node.
@@ -156,12 +163,13 @@ fn typeflow(
     auxiliary: &mut (
         &Function,
         &mut Vec<Type>,
-        &Vec<ir::Constant>,
+        &Vec<Constant>,
+        &Vec<DynamicConstant>,
         &mut HashMap<Type, TypeID>,
     ),
     id: NodeID,
 ) -> TypeSemilattice {
-    let (function, types, constants, reverse_type_map) = auxiliary;
+    let (function, types, constants, dynamic_constants, reverse_type_map) = auxiliary;
 
     // Whenever we want to reference a specific type (for example, for the
     // start node), we need to get its type ID. This helper function gets the
@@ -431,6 +439,7 @@ fn typeflow(
                 return Error(String::from("Parameter node must reference an index corresponding to an existing function argument."));
             }
 
+            // Type of parameter is stored directly in function.
             let param_id = function.param_types[*index];
 
             Concrete(param_id)
@@ -440,6 +449,7 @@ fn typeflow(
                 return Error(String::from("Constant node must have zero inputs."));
             }
 
+            // Most constants' type are obvious.
             match constants[id.idx()] {
                 Constant::Boolean(_) => {
                     Concrete(get_type_id(Type::Boolean, types, reverse_type_map))
@@ -480,9 +490,45 @@ fn typeflow(
                 Constant::Float64(_) => {
                     Concrete(get_type_id(Type::Float64, types, reverse_type_map))
                 }
-                Constant::Product(id, _) => Concrete(id),
-                Constant::Summation(id, _, _) => Concrete(id),
-                Constant::Array(id, _) => Concrete(id),
+                // Product, summation, and array constants are exceptions.
+                // Technically, only summation constants need to explicitly
+                // store their type, but product and array constants also
+                // explicitly store their type specifically to make this code
+                // simpler (although their type could be derived from the
+                // constant itself).
+                Constant::Product(id, _) => {
+                    if let Type::Product(_) = types[id.idx()] {
+                        Concrete(id)
+                    } else {
+                        Error(String::from(
+                            "Product constant must store an explicit product type.",
+                        ))
+                    }
+                }
+                Constant::Summation(id, _, _) => {
+                    if let Type::Summation(_) = types[id.idx()] {
+                        Concrete(id)
+                    } else {
+                        Error(String::from(
+                            "Summation constant must store an explicit summation type.",
+                        ))
+                    }
+                }
+                // Array typechecking also consists of validating the number of constant elements.
+                Constant::Array(id, ref elems) => {
+                    if let Type::Array(_, dc_id) = types[id.idx()] {
+                        if dynamic_constants[dc_id.idx()] == DynamicConstant::Constant(elems.len())
+                        {
+                            Concrete(id)
+                        } else {
+                            Error(String::from("Array constant must have the correct number of constant elements as specified by its type."))
+                        }
+                    } else {
+                        Error(String::from(
+                            "Array constant must store an explicit array type.",
+                        ))
+                    }
+                }
             }
         }
         Node::DynamicConstant { id: _ } => {
@@ -490,6 +536,7 @@ fn typeflow(
                 return Error(String::from("DynamicConstant node must have zero inputs."));
             }
 
+            // Dynamic constants are always u64.
             Concrete(get_type_id(
                 Type::UnsignedInteger64,
                 types,
-- 
GitLab


From 50b119889b802fc74914cfb45aaaaa84498d2a76 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 19 Sep 2023 13:21:09 -0500
Subject: [PATCH 046/105] Call node typecheck

---
 hercules_ir/src/typecheck.rs | 50 +++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 52de1e20..5637c695 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -98,6 +98,7 @@ impl Semilattice for TypeSemilattice {
  */
 pub fn typecheck(
     function: &Function,
+    functions: &Vec<Function>,
     types: &mut Vec<Type>,
     constants: &Vec<Constant>,
     dynamic_constants: &Vec<DynamicConstant>,
@@ -120,6 +121,7 @@ pub fn typecheck(
         typeflow,
         &mut (
             function,
+            functions,
             types,
             constants,
             dynamic_constants,
@@ -162,6 +164,7 @@ fn typeflow(
     inputs: &[&TypeSemilattice],
     auxiliary: &mut (
         &Function,
+        &Vec<Function>,
         &mut Vec<Type>,
         &Vec<Constant>,
         &Vec<DynamicConstant>,
@@ -169,7 +172,7 @@ fn typeflow(
     ),
     id: NodeID,
 ) -> TypeSemilattice {
-    let (function, types, constants, dynamic_constants, reverse_type_map) = auxiliary;
+    let (function, functions, types, constants, dynamic_constants, reverse_type_map) = auxiliary;
 
     // Whenever we want to reference a specific type (for example, for the
     // start node), we need to get its type ID. This helper function gets the
@@ -611,6 +614,8 @@ fn typeflow(
                                 op,
                             ));
                         }
+
+                        // Comparison operators change the input type.
                         return Concrete(get_type_id(Type::Boolean, types, reverse_type_map));
                     }
                     BinaryOperator::EQ | BinaryOperator::NE => {
@@ -620,6 +625,8 @@ fn typeflow(
                                 op,
                             ));
                         }
+
+                        // Equality operators potentially change the input type.
                         return Concrete(get_type_id(Type::Boolean, types, reverse_type_map));
                     }
                     BinaryOperator::Or
@@ -639,6 +646,47 @@ fn typeflow(
 
             inputs[0].clone()
         }
+        Node::Call {
+            function: callee_id,
+            dynamic_constants: dc_args,
+            args: _,
+        } => {
+            let callee = &functions[callee_id.idx()];
+
+            // Check number of run-time arguments.
+            if inputs.len() != callee.param_types.len() {
+                return Error(format!(
+                    "Call node has {} inputs, but calls a function with {} parameters.",
+                    inputs.len(),
+                    callee.param_types.len(),
+                ));
+            }
+
+            // Check number of dynamic constant arguments.
+            if dc_args.len() != callee.num_dynamic_constants as usize {
+                return Error(format!(
+                    "Call node references {} dynamic constants, but calls a function expecting {} dynamic constants.",
+                    dc_args.len(),
+                    callee.num_dynamic_constants
+                ));
+            }
+
+            // Check argument types.
+            for (input, param_ty) in zip(inputs.iter(), callee.param_types.iter()) {
+                if let Concrete(input_id) = input {
+                    if input_id != param_ty {
+                        return Error(String::from(
+                            "Call node mismatches argument types with callee function.",
+                        ));
+                    }
+                } else if input.is_error() {
+                    // If an input type is an error, we must propagate it.
+                    return (*input).clone();
+                }
+            }
+
+            Concrete(callee.return_type)
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 50ada5bd414ad463e4397a9d9c6a3e996bf714bc Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 19 Sep 2023 13:28:40 -0500
Subject: [PATCH 047/105] ReadProd node typecheck

---
 hercules_ir/src/typecheck.rs | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 5637c695..a404b50c 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -687,6 +687,28 @@ fn typeflow(
 
             Concrete(callee.return_type)
         }
+        Node::ReadProd { prod: _, index } => {
+            if inputs.len() != 1 {
+                return Error(String::from("ReadProd node must have exactly one input."));
+            }
+
+            if let Concrete(id) = inputs[0] {
+                if let Type::Product(elem_tys) = &types[id.idx()] {
+                    if *index >= elem_tys.len() {
+                        // ReadProd's index being out of range is a type error.
+                        return Error(String::from("ReadProd node's index must be within range of input product type's element list."));
+                    } else {
+                        return Concrete(elem_tys[*index]);
+                    }
+                } else {
+                    return Error(String::from(
+                        "ReadProd node's input type must be a product type.",
+                    ));
+                }
+            }
+
+            inputs[0].clone()
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 484d413406fee5d814ee1a5d13e5c2368c8f1fed Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 19 Sep 2023 13:37:26 -0500
Subject: [PATCH 048/105] WriteProd node typecheck

---
 hercules_ir/src/def_use.rs   |  6 +++---
 hercules_ir/src/typecheck.rs | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
index 5c808038..8810112e 100644
--- a/hercules_ir/src/def_use.rs
+++ b/hercules_ir/src/def_use.rs
@@ -115,9 +115,9 @@ pub fn get_uses<'a>(node: &'a Node) -> NodeUses<'a> {
             NodeUses::Phi(uses.into_boxed_slice())
         }
         Node::Return { control, value } => NodeUses::Two([*control, *value]),
-        Node::Parameter { index: _ } => todo!(),
-        Node::Constant { id: _ } => todo!(),
-        Node::DynamicConstant { id: _ } => todo!(),
+        Node::Parameter { index: _ } => NodeUses::Zero,
+        Node::Constant { id: _ } => NodeUses::Zero,
+        Node::DynamicConstant { id: _ } => NodeUses::Zero,
         Node::Unary { input, op: _ } => NodeUses::One([*input]),
         Node::Binary { left, right, op: _ } => NodeUses::Two([*left, *right]),
         Node::Call {
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index a404b50c..000cb676 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -692,6 +692,7 @@ fn typeflow(
                 return Error(String::from("ReadProd node must have exactly one input."));
             }
 
+            // If the input type isn't concrete, just propagate input type.
             if let Concrete(id) = inputs[0] {
                 if let Type::Product(elem_tys) = &types[id.idx()] {
                     if *index >= elem_tys.len() {
@@ -709,6 +710,40 @@ fn typeflow(
 
             inputs[0].clone()
         }
+        Node::WriteProd {
+            prod: _,
+            data: _,
+            index,
+        } => {
+            if inputs.len() != 2 {
+                return Error(String::from("WriteProd node must have exactly two inputs."));
+            }
+
+            // If the input type isn't concrete, just propagate input type.
+            if let Concrete(id) = inputs[0] {
+                if let Type::Product(elem_tys) = &types[id.idx()] {
+                    if *index >= elem_tys.len() {
+                        // ReadProd's index being out of range is a type error.
+                        return Error(String::from("WriteProd node's index must be within range of input product type's element list."));
+                    } else if let Concrete(data_id) = inputs[1] {
+                        if elem_tys[*index] != *data_id {
+                            return Error(format!("WriteProd node's data input doesn't match the type of the element at index {} inside the product type.", index));
+                        }
+                    } else if inputs[1].is_error() {
+                        // If an input lattice value is an error, we must
+                        // propagate it.
+                        return inputs[1].clone();
+                    }
+                    return Concrete(elem_tys[*index]);
+                } else {
+                    return Error(String::from(
+                        "WriteProd node's input type must be a product type.",
+                    ));
+                }
+            }
+
+            inputs[0].clone()
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 6ee8dd8ceb7e65a54c133ceb134bb33506b9cc01 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 19 Sep 2023 14:19:24 -0500
Subject: [PATCH 049/105] Add ExtractSum node

---
 hercules_ir/src/def_use.rs |  1 +
 hercules_ir/src/dot.rs     | 10 ++++++++++
 hercules_ir/src/ir.rs      |  4 ++++
 hercules_ir/src/parse.rs   | 11 +++++++++++
 4 files changed, 26 insertions(+)

diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
index 8810112e..8fd0a2b2 100644
--- a/hercules_ir/src/def_use.rs
+++ b/hercules_ir/src/def_use.rs
@@ -139,5 +139,6 @@ pub fn get_uses<'a>(node: &'a Node) -> NodeUses<'a> {
             sum_ty: _,
             variant: _,
         } => NodeUses::One([*data]),
+        Node::ExtractSum { data, variant: _ } => NodeUses::One([*data]),
     }
 }
diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index d98c973b..16166120 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -257,6 +257,12 @@ fn write_node<W: std::fmt::Write>(
                 write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
                 visited
             }
+            Node::ExtractSum { data, variant } => {
+                write!(w, "{} [label=\"extract_sum({})\"];\n", name, variant)?;
+                let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
+                write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
+                visited
+            }
         };
         Ok((visited.get(&id).unwrap().clone(), visited))
     }
@@ -318,6 +324,10 @@ fn get_string_node_kind(node: &Node) -> &'static str {
             sum_ty: _,
             variant: _,
         } => "build_sum",
+        Node::ExtractSum {
+            data: _,
+            variant: _,
+        } => "extract_sum",
     }
 }
 
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 5e871bbe..2d9ca407 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -237,6 +237,10 @@ pub enum Node {
         sum_ty: TypeID,
         variant: usize,
     },
+    ExtractSum {
+        data: NodeID,
+        variant: usize,
+    },
 }
 
 impl Node {
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index 5c28984b..a54f8109 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -306,6 +306,7 @@ fn parse_node<'a>(
         "write_array" => parse_write_array(ir_text, context)?,
         "match" => parse_match(ir_text, context)?,
         "build_sum" => parse_build_sum(ir_text, context)?,
+        "extract_sum" => parse_extract_sum(ir_text, context)?,
         _ => Err(nom::Err::Error(nom::error::Error {
             input: ir_text,
             code: nom::error::ErrorKind::IsNot,
@@ -589,6 +590,16 @@ fn parse_build_sum<'a>(
     ))
 }
 
+fn parse_extract_sum<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (data, variant)) =
+        parse_tuple2(parse_identifier, |x| parse_prim::<usize>(x, "1234567890"))(ir_text)?;
+    let data = context.borrow_mut().get_node_id(data);
+    Ok((ir_text, Node::ExtractSum { data, variant }))
+}
+
 fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Type> {
     // Parser combinators are very convenient, if a bit hard to read.
     let ir_text = nom::character::complete::multispace0(ir_text)?.0;
-- 
GitLab


From f3c753b6748e90597aec9f6ba973c6c0c9d152f5 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 20 Sep 2023 13:42:08 -0500
Subject: [PATCH 050/105] ReadArray node typecheck

---
 hercules_ir/src/ir.rs        | 10 ++++++++++
 hercules_ir/src/typecheck.rs | 29 +++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 2d9ca407..1abbb3f8 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -76,6 +76,16 @@ impl Type {
         self == &Type::Boolean
     }
 
+    pub fn is_unsigned(&self) -> bool {
+        match self {
+            Type::UnsignedInteger8 => true,
+            Type::UnsignedInteger16 => true,
+            Type::UnsignedInteger32 => true,
+            Type::UnsignedInteger64 => true,
+            _ => false,
+        }
+    }
+
     pub fn is_fixed(&self) -> bool {
         match self {
             Type::Integer8 => true,
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 000cb676..3febda07 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -744,6 +744,35 @@ fn typeflow(
 
             inputs[0].clone()
         }
+        Node::ReadArray { array: _, index: _ } => {
+            if inputs.len() != 2 {
+                return Error(String::from("ReadArray node must have exactly two inputs."));
+            }
+
+            // Check that index has unsigned type.
+            if let Concrete(id) = inputs[1] {
+                if !types[id.idx()].is_unsigned() {
+                    return Error(String::from(
+                        "ReadyArray node's index input must have unsigned type.",
+                    ));
+                }
+            } else if inputs[1].is_error() {
+                return inputs[1].clone();
+            }
+
+            // If array input is concrete, we can get type of ReadArray node.
+            if let Concrete(id) = inputs[0] {
+                if let Type::Array(elem_id, _) = types[id.idx()] {
+                    return Concrete(elem_id);
+                } else {
+                    return Error(String::from(
+                        "ReadyArray node's array input must have array type.",
+                    ));
+                }
+            }
+
+            inputs[0].clone()
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 124921f658671a218c13a07d7e30fa91260361c4 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 20 Sep 2023 13:59:13 -0500
Subject: [PATCH 051/105] WriteArray node typecheck

---
 hercules_ir/src/typecheck.rs | 46 ++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 3febda07..b1f224cb 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -753,7 +753,7 @@ fn typeflow(
             if let Concrete(id) = inputs[1] {
                 if !types[id.idx()].is_unsigned() {
                     return Error(String::from(
-                        "ReadyArray node's index input must have unsigned type.",
+                        "ReadArray node's index input must have unsigned type.",
                     ));
                 }
             } else if inputs[1].is_error() {
@@ -766,13 +766,55 @@ fn typeflow(
                     return Concrete(elem_id);
                 } else {
                     return Error(String::from(
-                        "ReadyArray node's array input must have array type.",
+                        "ReadArray node's array input must have array type.",
                     ));
                 }
             }
 
             inputs[0].clone()
         }
+        Node::WriteArray {
+            array: _,
+            data: _,
+            index: _,
+        } => {
+            if inputs.len() != 3 {
+                return Error(String::from("WriteArray node must have exactly 3 inputs."));
+            }
+
+            // Check that index has unsigned type.
+            if let Concrete(id) = inputs[2] {
+                if !types[id.idx()].is_unsigned() {
+                    return Error(String::from(
+                        "WriteArray node's index input must have unsigned type.",
+                    ));
+                }
+            } else if inputs[2].is_error() {
+                return inputs[2].clone();
+            }
+
+            // Check that array and data types match.
+            if let Concrete(array_id) = inputs[0] {
+                if let Type::Array(elem_id, _) = types[array_id.idx()] {
+                    if let Concrete(data_id) = inputs[1] {
+                        if elem_id != *data_id {
+                            return Error(String::from("WriteArray node's array and data inputs must have compatible types (type of data input must be the same as the array input's element type)."));
+                        }
+                    }
+                } else {
+                    return Error(String::from(
+                        "WriteArray node's array input must have array type.",
+                    ));
+                }
+            }
+
+            // If an input type is an error, we must propagate it.
+            if inputs[1].is_error() {
+                return inputs[1].clone();
+            }
+
+            inputs[0].clone()
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 85d37a622fb2635017e6130b1c5ce18266e58c0f Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 20 Sep 2023 14:06:24 -0500
Subject: [PATCH 052/105] Match node typecheck

---
 hercules_ir/src/typecheck.rs | 39 ++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index b1f224cb..cdc1ce05 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -815,6 +815,45 @@ fn typeflow(
 
             inputs[0].clone()
         }
+        Node::Match { control: _, sum: _ } => {
+            if inputs.len() != 2 {
+                return Error(String::from("Match node must have exactly two inputs."));
+            }
+
+            // Check sum and control inputs in if nest, since both need to be
+            // concrete to determine a concrete type for a match node.
+            if let Concrete(id) = inputs[1] {
+                if let Type::Summation(variants) = &types[id.idx()] {
+                    if let Concrete(id) = inputs[0] {
+                        if !types[id.idx()].is_control() {
+                            Error(String::from(
+                                "Match node's control input cannot have non-control type.",
+                            ))
+                        } else {
+                            let out_ty =
+                                Type::Product(vec![*id; variants.len()].into_boxed_slice());
+                            Concrete(get_type_id(out_ty, types, reverse_type_map))
+                        }
+                    } else if inputs[0].is_error() {
+                        // If an input has an error lattice value, it must be
+                        // propagated.
+                        inputs[0].clone()
+                    } else {
+                        TypeSemilattice::Unconstrained
+                    }
+                } else {
+                    Error(String::from(
+                        "Match node's condition input cannot have non-sum type.",
+                    ))
+                }
+            } else if inputs[1].is_error() {
+                // If an input has an error lattice value, it must be
+                // propagated.
+                inputs[1].clone()
+            } else {
+                TypeSemilattice::Unconstrained
+            }
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From a5cf54661d1a70fa94367c684317e1cae38ca46b Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 20 Sep 2023 14:15:44 -0500
Subject: [PATCH 053/105] Fix match node typecheck

---
 hercules_ir/src/typecheck.rs | 42 +++++++++++++++---------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index cdc1ce05..f5a7002f 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -822,36 +822,28 @@ fn typeflow(
 
             // Check sum and control inputs in if nest, since both need to be
             // concrete to determine a concrete type for a match node.
-            if let Concrete(id) = inputs[1] {
-                if let Type::Summation(variants) = &types[id.idx()] {
-                    if let Concrete(id) = inputs[0] {
-                        if !types[id.idx()].is_control() {
-                            Error(String::from(
-                                "Match node's control input cannot have non-control type.",
-                            ))
-                        } else {
-                            let out_ty =
-                                Type::Product(vec![*id; variants.len()].into_boxed_slice());
-                            Concrete(get_type_id(out_ty, types, reverse_type_map))
-                        }
-                    } else if inputs[0].is_error() {
-                        // If an input has an error lattice value, it must be
-                        // propagated.
-                        inputs[0].clone()
+            if let (Concrete(control_id), Concrete(sum_id)) = (inputs[0], inputs[1]) {
+                if let Type::Summation(variants) = &types[sum_id.idx()] {
+                    if !types[control_id.idx()].is_control() {
+                        return Error(String::from(
+                            "Match node's control input cannot have non-control type.",
+                        ));
                     } else {
-                        TypeSemilattice::Unconstrained
+                        let out_ty =
+                            Type::Product(vec![*control_id; variants.len()].into_boxed_slice());
+                        return Concrete(get_type_id(out_ty, types, reverse_type_map));
                     }
                 } else {
-                    Error(String::from(
+                    return Error(String::from(
                         "Match node's condition input cannot have non-sum type.",
-                    ))
+                    ));
                 }
-            } else if inputs[1].is_error() {
-                // If an input has an error lattice value, it must be
-                // propagated.
-                inputs[1].clone()
-            } else {
-                TypeSemilattice::Unconstrained
+            }
+
+            match TypeSemilattice::meet(inputs[0], inputs[1]) {
+                TypeSemilattice::Unconstrained => TypeSemilattice::Unconstrained,
+                TypeSemilattice::Concrete(_) => TypeSemilattice::Unconstrained,
+                TypeSemilattice::Error(msg) => TypeSemilattice::Error(msg),
             }
         }
         _ => todo!(),
-- 
GitLab


From 8da6154f9e923f6b565484bdb08930e9200bb435 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 20 Sep 2023 14:32:35 -0500
Subject: [PATCH 054/105] Update comments

---
 hercules_ir/src/typecheck.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index f5a7002f..8a4388d8 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -820,8 +820,8 @@ fn typeflow(
                 return Error(String::from("Match node must have exactly two inputs."));
             }
 
-            // Check sum and control inputs in if nest, since both need to be
-            // concrete to determine a concrete type for a match node.
+            // Check sum and control inputs simultaneously, since both need to
+            // be concrete to determine a concrete type for a match node.
             if let (Concrete(control_id), Concrete(sum_id)) = (inputs[0], inputs[1]) {
                 if let Type::Summation(variants) = &types[sum_id.idx()] {
                     if !types[control_id.idx()].is_control() {
@@ -840,6 +840,7 @@ fn typeflow(
                 }
             }
 
+            // Otherwise, currently unconstrained, or an error.
             match TypeSemilattice::meet(inputs[0], inputs[1]) {
                 TypeSemilattice::Unconstrained => TypeSemilattice::Unconstrained,
                 TypeSemilattice::Concrete(_) => TypeSemilattice::Unconstrained,
-- 
GitLab


From b6e884574500cecb6e531c8b7c64bfba4333914e Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 20 Sep 2023 16:38:57 -0500
Subject: [PATCH 055/105] BuildSum node typecheck

---
 hercules_ir/src/typecheck.rs | 44 ++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 8a4388d8..861ee171 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -16,18 +16,6 @@ enum TypeSemilattice {
 }
 
 impl TypeSemilattice {
-    fn is_unconstrained(&self) -> bool {
-        self == &Unconstrained
-    }
-
-    fn is_concrete(&self) -> bool {
-        if let Concrete(_) = self {
-            true
-        } else {
-            false
-        }
-    }
-
     fn is_error(&self) -> bool {
         if let Error(_) = self {
             true
@@ -847,6 +835,38 @@ fn typeflow(
                 TypeSemilattice::Error(msg) => TypeSemilattice::Error(msg),
             }
         }
+        Node::BuildSum {
+            data: _,
+            sum_ty,
+            variant,
+        } => {
+            if inputs.len() != 1 {
+                return Error(String::from("BuildSum node must have exactly one input."));
+            }
+
+            if let Concrete(id) = inputs[0] {
+                // BuildSum node stores its own result type.
+                if let Type::Summation(variants) = &types[sum_ty.idx()] {
+                    // Must reference an existing variant.
+                    if *variant >= variants.len() {
+                        return Error(String::from("BuildSum node's variant number must be in range of valid variant numbers for referenced sum type."));
+                    }
+
+                    // The variant type has to be the same as the type of data.
+                    if *id == variants[*variant] {
+                        return Error(String::from(
+                            "BuildSum node's input type must match the referenced variant type.",
+                        ));
+                    }
+
+                    return Concrete(*sum_ty);
+                } else {
+                    return Error(String::from("BuildSum node must reference a sum type."));
+                }
+            }
+
+            inputs[0].clone()
+        }
         _ => todo!(),
     }
 }
-- 
GitLab


From 3c4336cf96f66306cc503145c516a22d69f774ff Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 20 Sep 2023 16:42:58 -0500
Subject: [PATCH 056/105] ExtractSum node typecheck

---
 hercules_ir/src/typecheck.rs | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 861ee171..2c31c454 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -867,6 +867,27 @@ fn typeflow(
 
             inputs[0].clone()
         }
-        _ => todo!(),
+        Node::ExtractSum { data: _, variant } => {
+            if inputs.len() != 1 {
+                return Error(String::from("ExtractSum node must have exactly one input."));
+            }
+
+            if let Concrete(id) = inputs[0] {
+                if let Type::Summation(variants) = &types[id.idx()] {
+                    // Must reference an existing variant.
+                    if *variant >= variants.len() {
+                        return Error(String::from("BuildSum node's variant number must be in range of valid variant numbers for referenced sum type."));
+                    }
+
+                    return Concrete(variants[*variant]);
+                } else {
+                    return Error(String::from(
+                        "ExtractSum node's input cannot have non-sum type.",
+                    ));
+                }
+            }
+
+            inputs[0].clone()
+        }
     }
 }
-- 
GitLab


From 9e0d37ee34c3a47f4e306e20ead5b60defd48cde Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 20 Sep 2023 17:12:01 -0500
Subject: [PATCH 057/105] Refactor dataflow API

---
 hercules_ir/src/dataflow.rs  | 30 +++++++-------
 hercules_ir/src/typecheck.rs | 78 ++++++++++++++++++------------------
 2 files changed, 55 insertions(+), 53 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 9c34f635..6a2b0e1d 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -17,21 +17,21 @@ pub trait Semilattice: Eq {
 /*
  * Top level dataflow function. This routine is slightly more generic than the
  * typical textbook definition. The flow function takes an ordered slice of
- * predecessor lattice values, rather a single lattice value. Thus, the flow
- * function can perform non-associative operations on the "in" lattice values.
- * This makes this routine useful for some analyses, such as typechecking. To
- * perform the typical behavior, the flow function should start by meeting the
- * input lattice values into a single lattice value.
+ * predecessor lattice values, rather than a single lattice value. Thus, the
+ * flow function can perform non-associative and non-commutative operations on
+ * the "in" lattice values. This makes this routine more useful for some
+ * analyses, such as typechecking. To perform the typical behavior, the flow
+ * function should start by meeting the input lattice values into a single
+ * lattice value.
  */
-pub fn dataflow<L, F, D>(
+pub fn dataflow<L, F>(
     function: &Function,
     reverse_post_order: &Vec<NodeID>,
-    flow_function: F,
-    auxiliary_data: &mut D,
+    mut flow_function: F,
 ) -> Vec<L>
 where
     L: Semilattice,
-    F: Fn(&[&L], &mut D, NodeID) -> L,
+    F: FnMut(&[&L], &Node) -> L,
 {
     // Step 1: create initial set of "in" points. The start node is initialized
     // to bottom, and everything else is initialized to top.
@@ -43,7 +43,7 @@ where
     let mut outs: Vec<L> = ins
         .into_iter()
         .enumerate()
-        .map(|(id, l)| flow_function(&[&l], auxiliary_data, NodeID::new(id)))
+        .map(|(id, l)| flow_function(&[&l], &function.nodes[id]))
         .collect();
 
     // Step 3: compute NodeUses for each node in function.
@@ -54,25 +54,25 @@ where
         let mut change = false;
 
         // Iterate nodes in reverse post order.
-        for node in reverse_post_order {
+        for node_id in reverse_post_order {
             // Assemble the "out" values of the predecessors of this node. This
             // vector's definition is hopefully LICMed out, so that we don't do
             // an allocation per node. This can't be done manually because of
             // Rust's ownership rules (in particular, pred_outs holds a
             // reference to a value inside outs, which is mutated below).
             let mut pred_outs = vec![];
-            for u in uses[node.idx()].as_ref() {
+            for u in uses[node_id.idx()].as_ref() {
                 pred_outs.push(&outs[u.idx()]);
             }
 
             // Compute new "out" value from predecessor "out" values.
-            let new_out = flow_function(&pred_outs[..], auxiliary_data, *node);
-            if outs[node.idx()] != new_out {
+            let new_out = flow_function(&pred_outs[..], &function.nodes[node_id.idx()]);
+            if outs[node_id.idx()] != new_out {
                 change = true;
             }
 
             // Update outs vector.
-            outs[node.idx()] = new_out;
+            outs[node_id.idx()] = new_out;
         }
 
         // If no lattice value changed, we've reached the maximum fixed point
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 2c31c454..ccd7441f 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -85,7 +85,7 @@ impl Semilattice for TypeSemilattice {
  * Top level typecheck function.
  */
 pub fn typecheck(
-    function: &Function,
+    function_id: FunctionID,
     functions: &Vec<Function>,
     types: &mut Vec<Type>,
     constants: &Vec<Constant>,
@@ -104,17 +104,20 @@ pub fn typecheck(
     // function performs a non-associative operation on the predecessor "out"
     // values.
     let result = dataflow(
-        function,
+        &functions[function_id.idx()],
         reverse_post_order,
-        typeflow,
-        &mut (
-            function,
-            functions,
-            types,
-            constants,
-            dynamic_constants,
-            &mut reverse_type_map,
-        ),
+        |inputs, id| {
+            typeflow(
+                inputs,
+                id,
+                function_id,
+                functions,
+                types,
+                constants,
+                dynamic_constants,
+                &mut reverse_type_map,
+            )
+        },
     );
 
     // Step 3: add type for empty product. This is the type of the return node.
@@ -130,19 +133,22 @@ pub fn typecheck(
 
     // Step 4: convert the individual type lattice values into a list of
     // concrete type values, or a single error.
-    zip(result.into_iter(), function.nodes.iter())
-        .map(|(x, n)| match x {
-            Unconstrained => Err(String::from("Found unconstrained type in program.")),
-            Concrete(id) => Ok(id),
-            Error(msg) => {
-                if n.is_return() && Error(msg.clone()) == TypeSemilattice::get_return_type_error() {
-                    Ok(empty_prod_id)
-                } else {
-                    Err(msg)
-                }
+    zip(
+        result.into_iter(),
+        functions[function_id.idx()].nodes.iter(),
+    )
+    .map(|(x, n)| match x {
+        Unconstrained => Err(String::from("Found unconstrained type in program.")),
+        Concrete(id) => Ok(id),
+        Error(msg) => {
+            if n.is_return() && Error(msg.clone()) == TypeSemilattice::get_return_type_error() {
+                Ok(empty_prod_id)
+            } else {
+                Err(msg)
             }
-        })
-        .collect()
+        }
+    })
+    .collect()
 }
 
 /*
@@ -150,18 +156,14 @@ pub fn typecheck(
  */
 fn typeflow(
     inputs: &[&TypeSemilattice],
-    auxiliary: &mut (
-        &Function,
-        &Vec<Function>,
-        &mut Vec<Type>,
-        &Vec<Constant>,
-        &Vec<DynamicConstant>,
-        &mut HashMap<Type, TypeID>,
-    ),
-    id: NodeID,
+    node: &Node,
+    function_id: FunctionID,
+    functions: &Vec<Function>,
+    types: &mut Vec<Type>,
+    constants: &Vec<Constant>,
+    dynamic_constants: &Vec<DynamicConstant>,
+    reverse_type_map: &mut HashMap<Type, TypeID>,
 ) -> TypeSemilattice {
-    let (function, functions, types, constants, dynamic_constants, reverse_type_map) = auxiliary;
-
     // Whenever we want to reference a specific type (for example, for the
     // start node), we need to get its type ID. This helper function gets the
     // ID if it already exists. If the type doesn't already exist, the helper
@@ -181,7 +183,7 @@ fn typeflow(
     // Each node requires different type logic. This unfortunately results in a
     // large match statement. Oh well. Each arm returns the lattice value for
     // the "out" type of the node.
-    match &function.nodes[id.idx()] {
+    match node {
         Node::Start => {
             if inputs.len() != 0 {
                 return Error(String::from("Start node must have zero inputs."));
@@ -405,7 +407,7 @@ fn typeflow(
             }
 
             if let Concrete(id) = inputs[1] {
-                if *id != function.return_type {
+                if *id != functions[function_id.idx()].return_type {
                     return Error(String::from("Return node's data input type must be the same as the function's return type."));
                 }
             } else if inputs[1].is_error() {
@@ -426,12 +428,12 @@ fn typeflow(
                 return Error(String::from("Parameter node must have zero inputs."));
             }
 
-            if *index >= function.param_types.len() {
+            if *index >= functions[function_id.idx()].param_types.len() {
                 return Error(String::from("Parameter node must reference an index corresponding to an existing function argument."));
             }
 
             // Type of parameter is stored directly in function.
-            let param_id = function.param_types[*index];
+            let param_id = functions[function_id.idx()].param_types[*index];
 
             Concrete(param_id)
         }
-- 
GitLab


From 3d2fa9297acc3ce31c046205f2f8a3ef2719dedc Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 20 Sep 2023 17:47:18 -0500
Subject: [PATCH 058/105] Refactor typecheck API

---
 hercules_ir/src/dataflow.rs  |   4 +-
 hercules_ir/src/typecheck.rs | 112 ++++++++++++++++++++---------------
 2 files changed, 65 insertions(+), 51 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 6a2b0e1d..2cc1e6ef 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -26,7 +26,7 @@ pub trait Semilattice: Eq {
  */
 pub fn dataflow<L, F>(
     function: &Function,
-    reverse_post_order: &Vec<NodeID>,
+    reverse_postorder: &Vec<NodeID>,
     mut flow_function: F,
 ) -> Vec<L>
 where
@@ -54,7 +54,7 @@ where
         let mut change = false;
 
         // Iterate nodes in reverse post order.
-        for node_id in reverse_post_order {
+        for node_id in reverse_postorder {
             // Assemble the "out" values of the predecessors of this node. This
             // vector's definition is hopefully LICMed out, so that we don't do
             // an allocation per node. This can't be done manually because of
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index ccd7441f..6e9bdf89 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -82,18 +82,20 @@ impl Semilattice for TypeSemilattice {
 }
 
 /*
- * Top level typecheck function.
+ * Top level typecheck function. Typechecking is a module-wide operation.
+ * Returns a type for every node in every function.
  */
-pub fn typecheck(
-    function_id: FunctionID,
-    functions: &Vec<Function>,
-    types: &mut Vec<Type>,
-    constants: &Vec<Constant>,
-    dynamic_constants: &Vec<DynamicConstant>,
-    reverse_post_order: &Vec<NodeID>,
-) -> Result<Vec<TypeID>, String> {
+pub fn typecheck(module: &mut Module) -> Result<Vec<Vec<TypeID>>, String> {
     // Step 1: assemble a reverse type map. This is needed to get or create the
-    // ID of potentially new types.
+    // ID of potentially new types. Break down module into references to
+    // individual elements at this point, so that borrows don't overlap each
+    // other.
+    let Module {
+        ref functions,
+        ref mut types,
+        ref constants,
+        ref dynamic_constants,
+    } = module;
     let mut reverse_type_map: HashMap<Type, TypeID> = types
         .iter()
         .enumerate()
@@ -103,24 +105,28 @@ pub fn typecheck(
     // Step 2: run dataflow. This is an occurrence of dataflow where the flow
     // function performs a non-associative operation on the predecessor "out"
     // values.
-    let result = dataflow(
-        &functions[function_id.idx()],
-        reverse_post_order,
-        |inputs, id| {
-            typeflow(
-                inputs,
-                id,
-                function_id,
-                functions,
-                types,
-                constants,
-                dynamic_constants,
-                &mut reverse_type_map,
-            )
-        },
-    );
-
-    // Step 3: add type for empty product. This is the type of the return node.
+    let results: Vec<Vec<TypeSemilattice>> = functions
+        .iter()
+        .map(|function| {
+            let def_use_map = def_use(function);
+            let reverse_postorder = reverse_postorder(&def_use_map);
+
+            dataflow(function, &reverse_postorder, |inputs, id| {
+                typeflow(
+                    inputs,
+                    id,
+                    function,
+                    functions,
+                    types,
+                    constants,
+                    dynamic_constants,
+                    &mut reverse_type_map,
+                )
+            })
+        })
+        .collect();
+
+    // Step 3: add type for empty product. This is the type of return nodes.
     let empty_prod_ty = Type::Product(Box::new([]));
     let empty_prod_id = if let Some(id) = reverse_type_map.get(&empty_prod_ty) {
         *id
@@ -131,24 +137,32 @@ pub fn typecheck(
         id
     };
 
-    // Step 4: convert the individual type lattice values into a list of
+    // Step 4: convert the individual type lattice values into lists of
     // concrete type values, or a single error.
-    zip(
-        result.into_iter(),
-        functions[function_id.idx()].nodes.iter(),
-    )
-    .map(|(x, n)| match x {
-        Unconstrained => Err(String::from("Found unconstrained type in program.")),
-        Concrete(id) => Ok(id),
-        Error(msg) => {
-            if n.is_return() && Error(msg.clone()) == TypeSemilattice::get_return_type_error() {
-                Ok(empty_prod_id)
-            } else {
-                Err(msg)
-            }
-        }
-    })
-    .collect()
+    results
+        .into_iter()
+        .enumerate()
+        // For each type list, we want to convert its element TypeSemilattices
+        // into Result<TypeID, String>.
+        .map(|(function_idx, result): (usize, Vec<TypeSemilattice>)| {
+            zip(result.into_iter(), functions[function_idx].nodes.iter())
+                // For each TypeSemilattice, convert into Result<TypeID, String>.
+                .map(|(x, n): (TypeSemilattice, &Node)| match x {
+                    Unconstrained => Err(String::from("Found unconstrained type in program.")),
+                    Concrete(id) => Ok(id),
+                    Error(msg) => {
+                        if n.is_return()
+                            && Error(msg.clone()) == TypeSemilattice::get_return_type_error()
+                        {
+                            Ok(empty_prod_id)
+                        } else {
+                            Err(msg.clone())
+                        }
+                    }
+                })
+                .collect()
+        })
+        .collect()
 }
 
 /*
@@ -157,7 +171,7 @@ pub fn typecheck(
 fn typeflow(
     inputs: &[&TypeSemilattice],
     node: &Node,
-    function_id: FunctionID,
+    function: &Function,
     functions: &Vec<Function>,
     types: &mut Vec<Type>,
     constants: &Vec<Constant>,
@@ -407,7 +421,7 @@ fn typeflow(
             }
 
             if let Concrete(id) = inputs[1] {
-                if *id != functions[function_id.idx()].return_type {
+                if *id != function.return_type {
                     return Error(String::from("Return node's data input type must be the same as the function's return type."));
                 }
             } else if inputs[1].is_error() {
@@ -428,12 +442,12 @@ fn typeflow(
                 return Error(String::from("Parameter node must have zero inputs."));
             }
 
-            if *index >= functions[function_id.idx()].param_types.len() {
+            if *index >= function.param_types.len() {
                 return Error(String::from("Parameter node must reference an index corresponding to an existing function argument."));
             }
 
             // Type of parameter is stored directly in function.
-            let param_id = functions[function_id.idx()].param_types[*index];
+            let param_id = function.param_types[*index];
 
             Concrete(param_id)
         }
-- 
GitLab


From 0e77ecc9d771410703240aa7ade0d5598fc400b0 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 20 Sep 2023 18:09:46 -0500
Subject: [PATCH 059/105] Fixes

---
 hercules_ir/src/dataflow.rs             | 25 +++++++++++--------------
 hercules_ir/src/def_use.rs              |  2 +-
 hercules_ir/src/typecheck.rs            |  2 +-
 hercules_tools/src/hercules_dot/main.rs |  4 +++-
 4 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 2cc1e6ef..ce7ae232 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -33,23 +33,20 @@ where
     L: Semilattice,
     F: FnMut(&[&L], &Node) -> L,
 {
-    // Step 1: create initial set of "in" points. The start node is initialized
-    // to bottom, and everything else is initialized to top.
-    let ins: Vec<L> = (0..function.nodes.len())
-        .map(|id| if id == 0 { L::bottom() } else { L::top() })
-        .collect();
+    // Step 1: compute NodeUses for each node in function.
+    let uses: Vec<NodeUses> = function.nodes.iter().map(|n| get_uses(n)).collect();
 
     // Step 2: create initial set of "out" points.
-    let mut outs: Vec<L> = ins
-        .into_iter()
-        .enumerate()
-        .map(|(id, l)| flow_function(&[&l], &function.nodes[id]))
+    let mut outs: Vec<L> = (0..function.nodes.len())
+        .map(|id| {
+            flow_function(
+                &vec![&(if id == 0 { L::bottom() } else { L::top() }); uses[id].as_ref().len()],
+                &function.nodes[id],
+            )
+        })
         .collect();
 
-    // Step 3: compute NodeUses for each node in function.
-    let uses: Vec<NodeUses> = function.nodes.iter().map(|n| get_uses(n)).collect();
-
-    // Step 4: peform main dataflow loop.
+    // Step 3: peform main dataflow loop.
     loop {
         let mut change = false;
 
@@ -82,7 +79,7 @@ where
         }
     }
 
-    // Step 5: return "out" set.
+    // Step 4: return "out" set.
     outs
 }
 
diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
index 8fd0a2b2..0e750ca5 100644
--- a/hercules_ir/src/def_use.rs
+++ b/hercules_ir/src/def_use.rs
@@ -15,7 +15,7 @@ impl ImmutableDefUseMap {
         if id.idx() + 1 < self.first_edges.len() {
             self.first_edges[id.idx() + 1] - self.first_edges[id.idx()]
         } else {
-            self.first_edges.len() as u32 - self.first_edges[id.idx()]
+            self.users.len() as u32 - self.first_edges[id.idx()]
         }
     }
 
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 6e9bdf89..47b27a6d 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -588,7 +588,7 @@ fn typeflow(
             right: _,
             op,
         } => {
-            if inputs.len() != 1 {
+            if inputs.len() != 2 {
                 return Error(String::from("Binary node must have exactly two inputs."));
             }
 
diff --git a/hercules_tools/src/hercules_dot/main.rs b/hercules_tools/src/hercules_dot/main.rs
index 7db02a7b..21b6c3bb 100644
--- a/hercules_tools/src/hercules_dot/main.rs
+++ b/hercules_tools/src/hercules_dot/main.rs
@@ -26,8 +26,10 @@ fn main() {
     let mut contents = String::new();
     file.read_to_string(&mut contents)
         .expect("PANIC: Unable to read input file contents.");
-    let module =
+    let mut module =
         hercules_ir::parse::parse(&contents).expect("PANIC: Failed to parse Hercules IR file.");
+    let _types = hercules_ir::typecheck::typecheck(&mut module)
+        .expect("PANIC: Failed to typecheck Hercules IR module.");
     if args.output.is_empty() {
         let mut tmp_path = temp_dir();
         tmp_path.push("hercules_dot.dot");
-- 
GitLab


From 81bcbcaf6c96f93392be95bf672d9c4a92b4dbff Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 21 Sep 2023 14:17:45 -0500
Subject: [PATCH 060/105] Update DESIGN.md

---
 DESIGN.md | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/DESIGN.md b/DESIGN.md
index 9d5d7ddd..cd51e630 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -24,28 +24,36 @@ The Hercules' compiler is split into the following components:
 
 The IR of the Hercules compiler is similar to the sea of nodes IR presented in "A Simple Graph-Based Intermediate Representation", with a few differences.
 
-- There are dynamic constants, which are constants provided dynamically to the runtime system - these can be used to specify array types, unlike input dependent values.
-- There is no single global store. The closest analog are individual values with an array type, which support dynamic indexed read and write operations.
+- There are dynamic constants, which are constants provided dynamically to the runtime system - these can be used to specify array type sizes, unlike normal runtime values.
+- There is no single global store. The closest analog are individual values with an array type, which support dynamically indexed read and write operations.
 - There is no I/O, or other side effects.
 - There is no recursion.
 - The implementation of Hercules IR does not follow the original object oriented design.
 
 A key design consideration of Hercules IR is the absence of a concept of memory. A downside of this approach is that any language targetting Hecules IR must also be very restrictive regarding memory - in practice, this means tightly controlling or eliminating first-class references. The upside is that the compiler has complete freedom to layout data however it likes in memory when performing code generation. This includes deciding which data resides in which address spaces, which is a necessary ability for a compiler striving to have fine-grained control over what operations are computed on what devices.
 
-In addition to not having a generalized memory, Hercules IR has no functionality for calling functions with side-effects, or doing IO. In other words, Hercules is a pure IR (it's not functional, as functions aren't first class values). This may be changed in the future - we could support effectful programs by giving call operators a control input and output edge. However, at least for now, we need to work with the simplest IR possible.
+In addition to not having a generalized memory, Hercules IR has no functionality for calling functions with side-effects, or doing IO. In other words, Hercules is a pure IR (it's not functional, as functions aren't first class values). This may be changed in the future - we could support effectful programs by giving call operators a control input and output edge. However, at least for now, we need to work with the simplest IR possible, so the IR is pure.
 
 ### Optimizations
 
+Hercules relies on other compiler infrastructures, such as LLVM, to do code generation for specific devices. Thus, Hercules itself doesn't perform particularly sophisticated optimizations. In general, the optimizations Hercules do are done to make partitioning easier. This includes things like GVN and peephole optimizations, which in general, make the IR "simpler".
+
 TODO: @rarbore2
 
 ### Partitioning
 
+Partitioning is responsible for deciding which operations in the IR graph are executed on which devices. Additionally, operations are broken up into shards - every node in a shard executes on the same device, and the runtime system schedules execution at the shard level. Partitioning is conceptually very similar to instruction selection. Each shard can be thought of as a single instruction, and the device the shard is executed on can be thought of as the particular instruction being selected. In instruction selection, there is not only the choice of which instructions to use, but also how to partition the potentially many operations in the IR into a smaller number of target instructions. Similarly, partitioning Hercules IR must decide which operations are grouped together into the same shard, and for each shard, which device it should execute on. The set of operations each potential target device is capable of executing is crucial information when forming the shard boundaries, so this cannot be performed optimally as a sequential two step process.
+
 TODO: @rarbore2
 
 ### Code Generation
 
 Hercules uses LLVM for generating CPU and GPU code. Memory is "introduced" into the program representation at this stage. Operations in a function are separated into basic blocks. The data layout of values is decided on, and memory is allocated on the stack or is designated as separately allocated and passed into functions as necessary. Code is generated corresponding to possibly several estimates of dynamic constants.
 
+TODO: @rarbore2
+
 ## Runtime System
 
 The runtime system is responsible for dynamically executing code generated by Hercules. It exposes a Rust API for executing Hercules code. It takes care of memory allocation, synchronization, and scheduling.
+
+TODO: @rarbore2
-- 
GitLab


From bfb66d951f1440602a989a11ae0955b36b4a5d68 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 25 Sep 2023 16:56:19 -0500
Subject: [PATCH 061/105] Begin writing verification code

---
 hercules_ir/src/lib.rs                  |  2 ++
 hercules_ir/src/typecheck.rs            |  4 ++-
 hercules_ir/src/verify.rs               | 34 +++++++++++++++++++++++++
 hercules_tools/src/hercules_dot/main.rs |  2 +-
 4 files changed, 40 insertions(+), 2 deletions(-)
 create mode 100644 hercules_ir/src/verify.rs

diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs
index df72712f..cd66d5fb 100644
--- a/hercules_ir/src/lib.rs
+++ b/hercules_ir/src/lib.rs
@@ -4,6 +4,7 @@ pub mod dot;
 pub mod ir;
 pub mod parse;
 pub mod typecheck;
+pub mod verify;
 
 pub use crate::dataflow::*;
 pub use crate::def_use::*;
@@ -11,3 +12,4 @@ pub use crate::dot::*;
 pub use crate::ir::*;
 pub use crate::parse::*;
 pub use crate::typecheck::*;
+pub use crate::verify::*;
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 47b27a6d..0719fe9d 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -81,11 +81,13 @@ impl Semilattice for TypeSemilattice {
     }
 }
 
+pub type ModuleTyping = Vec<Vec<TypeID>>;
+
 /*
  * Top level typecheck function. Typechecking is a module-wide operation.
  * Returns a type for every node in every function.
  */
-pub fn typecheck(module: &mut Module) -> Result<Vec<Vec<TypeID>>, String> {
+pub fn typecheck(module: &mut Module) -> Result<ModuleTyping, String> {
     // Step 1: assemble a reverse type map. This is needed to get or create the
     // ID of potentially new types. Break down module into references to
     // individual elements at this point, so that borrows don't overlap each
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
new file mode 100644
index 00000000..df8dad0e
--- /dev/null
+++ b/hercules_ir/src/verify.rs
@@ -0,0 +1,34 @@
+use crate::*;
+
+/*
+ * Top level IR verification function. Verification runs passes that produce
+ * useful results (typing, dominator trees, etc.), so if verification succeeds,
+ * return those useful results. Otherwise, return the first error string found.
+ */
+pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
+    let typing = typecheck(module)?;
+    for function in module.functions.iter() {
+        verify_structure(&function)?;
+    }
+    Ok(typing)
+}
+
+/*
+ * There are structural constraints the IR must follow, such as all Phi nodes'
+ * control input must be a region node. This is where those properties are
+ * verified.
+ */
+fn verify_structure(function: &Function) -> Result<(), String> {
+    for node in function.nodes.iter() {
+        match node {
+            Node::Phi { control, data: _ } => {
+                if let Node::Region { preds: _ } = function.nodes[control.idx()] {
+                } else {
+                    Err("Phi node's control input must be a region node.")?;
+                }
+            }
+            _ => {}
+        };
+    }
+    Ok(())
+}
diff --git a/hercules_tools/src/hercules_dot/main.rs b/hercules_tools/src/hercules_dot/main.rs
index 21b6c3bb..226f91db 100644
--- a/hercules_tools/src/hercules_dot/main.rs
+++ b/hercules_tools/src/hercules_dot/main.rs
@@ -28,7 +28,7 @@ fn main() {
         .expect("PANIC: Unable to read input file contents.");
     let mut module =
         hercules_ir::parse::parse(&contents).expect("PANIC: Failed to parse Hercules IR file.");
-    let _types = hercules_ir::typecheck::typecheck(&mut module)
+    let _types = hercules_ir::verify::verify(&mut module)
         .expect("PANIC: Failed to typecheck Hercules IR module.");
     if args.output.is_empty() {
         let mut tmp_path = temp_dir();
-- 
GitLab


From ffe1630b40e4dddb0cdf343790680cd152489ddb Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 27 Sep 2023 17:00:37 -0500
Subject: [PATCH 062/105] Verify structure of if nodes

---
 hercules_ir/src/typecheck.rs | 21 ++++++++--------
 hercules_ir/src/verify.rs    | 49 ++++++++++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 0719fe9d..005a7e8d 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -1,8 +1,8 @@
-use crate::*;
-
 use std::collections::HashMap;
 use std::iter::zip;
 
+use crate::*;
+
 use self::TypeSemilattice::*;
 
 /*
@@ -84,10 +84,13 @@ impl Semilattice for TypeSemilattice {
 pub type ModuleTyping = Vec<Vec<TypeID>>;
 
 /*
- * Top level typecheck function. Typechecking is a module-wide operation.
+ * Top level typecheck function. Typechecking is a module-wide analysis.
  * Returns a type for every node in every function.
  */
-pub fn typecheck(module: &mut Module) -> Result<ModuleTyping, String> {
+pub fn typecheck(
+    module: &mut Module,
+    reverse_postorders: &Vec<Vec<NodeID>>,
+) -> Result<ModuleTyping, String> {
     // Step 1: assemble a reverse type map. This is needed to get or create the
     // ID of potentially new types. Break down module into references to
     // individual elements at this point, so that borrows don't overlap each
@@ -107,13 +110,9 @@ pub fn typecheck(module: &mut Module) -> Result<ModuleTyping, String> {
     // Step 2: run dataflow. This is an occurrence of dataflow where the flow
     // function performs a non-associative operation on the predecessor "out"
     // values.
-    let results: Vec<Vec<TypeSemilattice>> = functions
-        .iter()
-        .map(|function| {
-            let def_use_map = def_use(function);
-            let reverse_postorder = reverse_postorder(&def_use_map);
-
-            dataflow(function, &reverse_postorder, |inputs, id| {
+    let results: Vec<Vec<TypeSemilattice>> = zip(functions, reverse_postorders)
+        .map(|(function, reverse_postorder)| {
+            dataflow(function, reverse_postorder, |inputs, id| {
                 typeflow(
                     inputs,
                     id,
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index df8dad0e..86b4c4f0 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -1,3 +1,5 @@
+use std::iter::zip;
+
 use crate::*;
 
 /*
@@ -6,9 +8,18 @@ use crate::*;
  * return those useful results. Otherwise, return the first error string found.
  */
 pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
-    let typing = typecheck(module)?;
-    for function in module.functions.iter() {
-        verify_structure(&function)?;
+    let def_uses: Vec<_> = module
+        .functions
+        .iter()
+        .map(|function| def_use(function))
+        .collect();
+    let reverse_postorders: Vec<_> = def_uses
+        .iter()
+        .map(|def_use| reverse_postorder(def_use))
+        .collect();
+    let typing = typecheck(module, &reverse_postorders)?;
+    for (function, def_use) in zip(module.functions.iter(), def_uses.iter()) {
+        verify_structure(function, def_use)?;
     }
     Ok(typing)
 }
@@ -18,8 +29,8 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
  * control input must be a region node. This is where those properties are
  * verified.
  */
-fn verify_structure(function: &Function) -> Result<(), String> {
-    for node in function.nodes.iter() {
+fn verify_structure(function: &Function, def_use: &ImmutableDefUseMap) -> Result<(), String> {
+    for (idx, node) in function.nodes.iter().enumerate() {
         match node {
             Node::Phi { control, data: _ } => {
                 if let Node::Region { preds: _ } = function.nodes[control.idx()] {
@@ -27,6 +38,34 @@ fn verify_structure(function: &Function) -> Result<(), String> {
                     Err("Phi node's control input must be a region node.")?;
                 }
             }
+            Node::If {
+                control: _,
+                cond: _,
+            } => {
+                let users = def_use.get_users(NodeID::new(idx));
+                if users.len() != 2 {
+                    Err(format!("If node must have 2 users, not {}.", users.len()))?;
+                }
+                if let (
+                    Node::ReadProd {
+                        prod: _,
+                        index: index1,
+                    },
+                    Node::ReadProd {
+                        prod: _,
+                        index: index2,
+                    },
+                ) = (
+                    &function.nodes[users[0].idx()],
+                    &function.nodes[users[1].idx()],
+                ) {
+                    if !((*index1 == 0 && *index2 == 1) || (*index1 == 1 && *index2 == 0)) {
+                        Err("If node's user ReadProd nodes must reference different elements of If node's output product.")?;
+                    }
+                } else {
+                    Err("If node's users must both be ReadProd nodes.")?;
+                }
+            }
             _ => {}
         };
     }
-- 
GitLab


From 981abe2303c2c10294be37b7b24fbe85e00905d8 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 27 Sep 2023 17:06:41 -0500
Subject: [PATCH 063/105] Check that return has 0 users in verify, not
 typecheck

---
 hercules_ir/src/typecheck.rs | 53 ++++++++----------------------------
 hercules_ir/src/verify.rs    | 13 ++++++++-
 2 files changed, 23 insertions(+), 43 deletions(-)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 005a7e8d..9b8b4ef9 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -23,15 +23,6 @@ impl TypeSemilattice {
             false
         }
     }
-
-    // During typeflow, the return node is given an error type, even when
-    // typechecking succeeds. This is done so that any node that uses a return
-    // node will have its output type set to this error. In the top-level type
-    // checking function, we ignore this particular error if the node being
-    // checked is a return node.
-    fn get_return_type_error() -> Self {
-        Error(String::from("No node can take a return node as input."))
-    }
 }
 
 impl PartialEq for TypeSemilattice {
@@ -127,39 +118,20 @@ pub fn typecheck(
         })
         .collect();
 
-    // Step 3: add type for empty product. This is the type of return nodes.
-    let empty_prod_ty = Type::Product(Box::new([]));
-    let empty_prod_id = if let Some(id) = reverse_type_map.get(&empty_prod_ty) {
-        *id
-    } else {
-        let id = TypeID::new(reverse_type_map.len());
-        reverse_type_map.insert(empty_prod_ty.clone(), id);
-        types.push(empty_prod_ty);
-        id
-    };
-
-    // Step 4: convert the individual type lattice values into lists of
+    // Step 3: convert the individual type lattice values into lists of
     // concrete type values, or a single error.
     results
         .into_iter()
-        .enumerate()
         // For each type list, we want to convert its element TypeSemilattices
         // into Result<TypeID, String>.
-        .map(|(function_idx, result): (usize, Vec<TypeSemilattice>)| {
-            zip(result.into_iter(), functions[function_idx].nodes.iter())
+        .map(|result| {
+            result
+                .into_iter()
                 // For each TypeSemilattice, convert into Result<TypeID, String>.
-                .map(|(x, n): (TypeSemilattice, &Node)| match x {
+                .map(|x| match x {
                     Unconstrained => Err(String::from("Found unconstrained type in program.")),
                     Concrete(id) => Ok(id),
-                    Error(msg) => {
-                        if n.is_return()
-                            && Error(msg.clone()) == TypeSemilattice::get_return_type_error()
-                        {
-                            Ok(empty_prod_id)
-                        } else {
-                            Err(msg.clone())
-                        }
-                    }
+                    Error(msg) => Err(msg.clone()),
                 })
                 .collect()
         })
@@ -429,14 +401,11 @@ fn typeflow(
                 return inputs[1].clone();
             }
 
-            // Return nodes are special - they cannot have any users. Thus, we
-            // set the return node's lattice value to a specific error. When
-            // converting lattice values to types, this particular error gets
-            // converted to an empty product type if it's the type of a return
-            // node. If any node uses a return node, it's lattice value will be
-            // this error. This will result in a normal error when attempting to
-            // extract conrete types.
-            TypeSemilattice::get_return_type_error()
+            Concrete(get_type_id(
+                Type::Product(Box::new([])),
+                types,
+                reverse_type_map,
+            ))
         }
         Node::Parameter { index } => {
             if inputs.len() != 0 {
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index 86b4c4f0..3e940d2c 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -31,6 +31,7 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
  */
 fn verify_structure(function: &Function, def_use: &ImmutableDefUseMap) -> Result<(), String> {
     for (idx, node) in function.nodes.iter().enumerate() {
+        let users = def_use.get_users(NodeID::new(idx));
         match node {
             Node::Phi { control, data: _ } => {
                 if let Node::Region { preds: _ } = function.nodes[control.idx()] {
@@ -42,7 +43,6 @@ fn verify_structure(function: &Function, def_use: &ImmutableDefUseMap) -> Result
                 control: _,
                 cond: _,
             } => {
-                let users = def_use.get_users(NodeID::new(idx));
                 if users.len() != 2 {
                     Err(format!("If node must have 2 users, not {}.", users.len()))?;
                 }
@@ -66,6 +66,17 @@ fn verify_structure(function: &Function, def_use: &ImmutableDefUseMap) -> Result
                     Err("If node's users must both be ReadProd nodes.")?;
                 }
             }
+            Node::Return {
+                control: _,
+                value: _,
+            } => {
+                if users.len() != 0 {
+                    Err(format!(
+                        "Return node must have 0 users, not {}.",
+                        users.len()
+                    ))?;
+                }
+            }
             _ => {}
         };
     }
-- 
GitLab


From 460065a9651acdfbb26848fd9c36e6f827e63e31 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 27 Sep 2023 17:22:04 -0500
Subject: [PATCH 064/105] Verify match structure

---
 hercules_ir/src/verify.rs | 54 ++++++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 9 deletions(-)

diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index 3e940d2c..d12417f1 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -1,5 +1,9 @@
+extern crate bitvec;
+
 use std::iter::zip;
 
+use verify::bitvec::prelude::*;
+
 use crate::*;
 
 /*
@@ -18,8 +22,10 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
         .map(|def_use| reverse_postorder(def_use))
         .collect();
     let typing = typecheck(module, &reverse_postorders)?;
-    for (function, def_use) in zip(module.functions.iter(), def_uses.iter()) {
-        verify_structure(function, def_use)?;
+    for (function, (def_use, typing)) in
+        zip(module.functions.iter(), zip(def_uses.iter(), typing.iter()))
+    {
+        verify_structure(function, def_use, typing, &module.types)?;
     }
     Ok(typing)
 }
@@ -29,16 +35,15 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
  * control input must be a region node. This is where those properties are
  * verified.
  */
-fn verify_structure(function: &Function, def_use: &ImmutableDefUseMap) -> Result<(), String> {
+fn verify_structure(
+    function: &Function,
+    def_use: &ImmutableDefUseMap,
+    typing: &Vec<TypeID>,
+    types: &Vec<Type>,
+) -> Result<(), String> {
     for (idx, node) in function.nodes.iter().enumerate() {
         let users = def_use.get_users(NodeID::new(idx));
         match node {
-            Node::Phi { control, data: _ } => {
-                if let Node::Region { preds: _ } = function.nodes[control.idx()] {
-                } else {
-                    Err("Phi node's control input must be a region node.")?;
-                }
-            }
             Node::If {
                 control: _,
                 cond: _,
@@ -66,6 +71,12 @@ fn verify_structure(function: &Function, def_use: &ImmutableDefUseMap) -> Result
                     Err("If node's users must both be ReadProd nodes.")?;
                 }
             }
+            Node::Phi { control, data: _ } => {
+                if let Node::Region { preds: _ } = function.nodes[control.idx()] {
+                } else {
+                    Err("Phi node's control input must be a region node.")?;
+                }
+            }
             Node::Return {
                 control: _,
                 value: _,
@@ -77,6 +88,31 @@ fn verify_structure(function: &Function, def_use: &ImmutableDefUseMap) -> Result
                     ))?;
                 }
             }
+            Node::Match { control: _, sum } => {
+                let sum_ty = &types[typing[sum.idx()].idx()];
+                if let Type::Summation(tys) = sum_ty {
+                    let correct_number_of_users = tys.len();
+                    if users.len() != correct_number_of_users {
+                        Err(format!(
+                            "Match node must have {} users, not {}.",
+                            correct_number_of_users,
+                            users.len()
+                        ))?;
+                    }
+                    let mut users_covered = bitvec![u8, Lsb0; 0; users.len()];
+                    for user in users {
+                        if let Node::ReadProd { prod: _, index } = function.nodes[user.idx()] {
+                            assert!(index < users.len(), "ReadProd child of match node reads from bad index, but ran after typecheck succeeded.");
+                            users_covered.set(index, true);
+                        }
+                    }
+                    if users_covered.count_ones() != users.len() {
+                        Err(format!("Match node's user ReadProd nodes must reference all {} elements of match node's output product, but they only reference {} of them.", users.len(), users_covered.count_ones()))?;
+                    }
+                } else {
+                    panic!("Type of match node's sum input is not a summation type, but ran after typecheck succeeded.");
+                }
+            }
             _ => {}
         };
     }
-- 
GitLab


From 9e79332f5f589b1e97c14bf2051e090ff156463e Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 27 Sep 2023 17:30:58 -0500
Subject: [PATCH 065/105] Verify fork/join structure, clean up code for getting
 op name

---
 hercules_ir/src/dot.rs    |  98 +----------------
 hercules_ir/src/ir.rs     | 216 +++++++++++++++++++++++++++++++++++---
 hercules_ir/src/verify.rs |  21 +++-
 3 files changed, 223 insertions(+), 112 deletions(-)

diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 16166120..dd85cc99 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -46,7 +46,7 @@ fn write_node<W: std::fmt::Write>(
         Ok((visited.get(&id).unwrap().clone(), visited))
     } else {
         let node = &module.functions[i].nodes[j];
-        let name = format!("{}_{}_{}", get_string_node_kind(node), i, j);
+        let name = format!("{}_{}_{}", node.lower_case_name(), i, j);
         visited.insert(NodeID::new(j), name.clone());
         let visited = match node {
             Node::Start => {
@@ -155,13 +155,13 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::Unary { input, op } => {
-                write!(w, "{} [label=\"{}\"];\n", name, get_string_uop_kind(*op))?;
+                write!(w, "{} [label=\"{}\"];\n", name, op.lower_case_name())?;
                 let (input_name, visited) = write_node(i, input.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"input\"];\n", input_name, name)?;
                 visited
             }
             Node::Binary { left, right, op } => {
-                write!(w, "{} [label=\"{}\"];\n", name, get_string_bop_kind(*op))?;
+                write!(w, "{} [label=\"{}\"];\n", name, op.lower_case_name())?;
                 let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
                 let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
@@ -267,95 +267,3 @@ fn write_node<W: std::fmt::Write>(
         Ok((visited.get(&id).unwrap().clone(), visited))
     }
 }
-
-fn get_string_node_kind(node: &Node) -> &'static str {
-    match node {
-        Node::Start => "start",
-        Node::Region { preds: _ } => "region",
-        Node::If {
-            control: _,
-            cond: _,
-        } => "if",
-        Node::Fork {
-            control: _,
-            factor: _,
-        } => "fork",
-        Node::Join {
-            control: _,
-            data: _,
-        } => "join",
-        Node::Phi {
-            control: _,
-            data: _,
-        } => "phi",
-        Node::Return {
-            control: _,
-            value: _,
-        } => "return",
-        Node::Parameter { index: _ } => "parameter",
-        Node::DynamicConstant { id: _ } => "dynamic_constant",
-        Node::Constant { id: _ } => "constant",
-        Node::Unary { input: _, op } => get_string_uop_kind(*op),
-        Node::Binary {
-            left: _,
-            right: _,
-            op,
-        } => get_string_bop_kind(*op),
-        Node::Call {
-            function: _,
-            dynamic_constants: _,
-            args: _,
-        } => "call",
-        Node::ReadProd { prod: _, index: _ } => "read_prod",
-        Node::WriteProd {
-            prod: _,
-            data: _,
-            index: _,
-        } => "write_prod ",
-        Node::ReadArray { array: _, index: _ } => "read_array",
-        Node::WriteArray {
-            array: _,
-            data: _,
-            index: _,
-        } => "write_array",
-        Node::Match { control: _, sum: _ } => "match",
-        Node::BuildSum {
-            data: _,
-            sum_ty: _,
-            variant: _,
-        } => "build_sum",
-        Node::ExtractSum {
-            data: _,
-            variant: _,
-        } => "extract_sum",
-    }
-}
-
-fn get_string_uop_kind(uop: UnaryOperator) -> &'static str {
-    match uop {
-        UnaryOperator::Not => "not",
-        UnaryOperator::Neg => "neg",
-        UnaryOperator::Bitflip => "bitflip",
-    }
-}
-
-fn get_string_bop_kind(bop: BinaryOperator) -> &'static str {
-    match bop {
-        BinaryOperator::Add => "add",
-        BinaryOperator::Sub => "sub",
-        BinaryOperator::Mul => "mul",
-        BinaryOperator::Div => "div",
-        BinaryOperator::Rem => "rem",
-        BinaryOperator::LT => "lt",
-        BinaryOperator::LTE => "lte",
-        BinaryOperator::GT => "gt",
-        BinaryOperator::GTE => "gte",
-        BinaryOperator::EQ => "eq",
-        BinaryOperator::NE => "ne",
-        BinaryOperator::Or => "or",
-        BinaryOperator::And => "and",
-        BinaryOperator::Xor => "xor",
-        BinaryOperator::LSh => "lsh",
-        BinaryOperator::RSh => "rsh",
-    }
-}
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 1abbb3f8..d07484a5 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -253,20 +253,6 @@ pub enum Node {
     },
 }
 
-impl Node {
-    pub fn is_return(&self) -> bool {
-        if let Node::Return {
-            control: _,
-            value: _,
-        } = self
-        {
-            true
-        } else {
-            false
-        }
-    }
-}
-
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum UnaryOperator {
     Not,
@@ -294,6 +280,208 @@ pub enum BinaryOperator {
     RSh,
 }
 
+impl Node {
+    pub fn is_return(&self) -> bool {
+        if let Node::Return {
+            control: _,
+            value: _,
+        } = self
+        {
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn upper_case_name(&self) -> &'static str {
+        match self {
+            Node::Start => "Start",
+            Node::Region { preds: _ } => "Region",
+            Node::If {
+                control: _,
+                cond: _,
+            } => "If",
+            Node::Fork {
+                control: _,
+                factor: _,
+            } => "Fork",
+            Node::Join {
+                control: _,
+                data: _,
+            } => "Join",
+            Node::Phi {
+                control: _,
+                data: _,
+            } => "Phi",
+            Node::Return {
+                control: _,
+                value: _,
+            } => "Return",
+            Node::Parameter { index: _ } => "Parameter",
+            Node::DynamicConstant { id: _ } => "DynamicConstant",
+            Node::Constant { id: _ } => "Constant",
+            Node::Unary { input: _, op } => op.upper_case_name(),
+            Node::Binary {
+                left: _,
+                right: _,
+                op,
+            } => op.upper_case_name(),
+            Node::Call {
+                function: _,
+                dynamic_constants: _,
+                args: _,
+            } => "Unary",
+            Node::ReadProd { prod: _, index: _ } => "ReadProd",
+            Node::WriteProd {
+                prod: _,
+                data: _,
+                index: _,
+            } => "WriteProd",
+            Node::ReadArray { array: _, index: _ } => "ReadArray",
+            Node::WriteArray {
+                array: _,
+                data: _,
+                index: _,
+            } => "WriteArray",
+            Node::Match { control: _, sum: _ } => "Match",
+            Node::BuildSum {
+                data: _,
+                sum_ty: _,
+                variant: _,
+            } => "BuildSum",
+            Node::ExtractSum {
+                data: _,
+                variant: _,
+            } => "ExtractSum",
+        }
+    }
+
+    pub fn lower_case_name(&self) -> &'static str {
+        match self {
+            Node::Start => "start",
+            Node::Region { preds: _ } => "region",
+            Node::If {
+                control: _,
+                cond: _,
+            } => "if",
+            Node::Fork {
+                control: _,
+                factor: _,
+            } => "fork",
+            Node::Join {
+                control: _,
+                data: _,
+            } => "join",
+            Node::Phi {
+                control: _,
+                data: _,
+            } => "phi",
+            Node::Return {
+                control: _,
+                value: _,
+            } => "return",
+            Node::Parameter { index: _ } => "parameter",
+            Node::DynamicConstant { id: _ } => "dynamic_constant",
+            Node::Constant { id: _ } => "constant",
+            Node::Unary { input: _, op } => op.lower_case_name(),
+            Node::Binary {
+                left: _,
+                right: _,
+                op,
+            } => op.lower_case_name(),
+            Node::Call {
+                function: _,
+                dynamic_constants: _,
+                args: _,
+            } => "call",
+            Node::ReadProd { prod: _, index: _ } => "read_prod",
+            Node::WriteProd {
+                prod: _,
+                data: _,
+                index: _,
+            } => "write_prod ",
+            Node::ReadArray { array: _, index: _ } => "read_array",
+            Node::WriteArray {
+                array: _,
+                data: _,
+                index: _,
+            } => "write_array",
+            Node::Match { control: _, sum: _ } => "match",
+            Node::BuildSum {
+                data: _,
+                sum_ty: _,
+                variant: _,
+            } => "build_sum",
+            Node::ExtractSum {
+                data: _,
+                variant: _,
+            } => "extract_sum",
+        }
+    }
+}
+
+impl UnaryOperator {
+    pub fn upper_case_name(&self) -> &'static str {
+        match self {
+            UnaryOperator::Not => "Not",
+            UnaryOperator::Neg => "Neg",
+            UnaryOperator::Bitflip => "Bitflip",
+        }
+    }
+
+    pub fn lower_case_name(&self) -> &'static str {
+        match self {
+            UnaryOperator::Not => "not",
+            UnaryOperator::Neg => "neg",
+            UnaryOperator::Bitflip => "bitflip",
+        }
+    }
+}
+
+impl BinaryOperator {
+    pub fn upper_case_name(&self) -> &'static str {
+        match self {
+            BinaryOperator::Add => "Add",
+            BinaryOperator::Sub => "Sub",
+            BinaryOperator::Mul => "Mul",
+            BinaryOperator::Div => "Div",
+            BinaryOperator::Rem => "Rem",
+            BinaryOperator::LT => "LT",
+            BinaryOperator::LTE => "LTE",
+            BinaryOperator::GT => "GT",
+            BinaryOperator::GTE => "GTE",
+            BinaryOperator::EQ => "EQ",
+            BinaryOperator::NE => "NE",
+            BinaryOperator::Or => "Or",
+            BinaryOperator::And => "And",
+            BinaryOperator::Xor => "Xor",
+            BinaryOperator::LSh => "LSh",
+            BinaryOperator::RSh => "RSh",
+        }
+    }
+
+    pub fn lower_case_name(&self) -> &'static str {
+        match self {
+            BinaryOperator::Add => "add",
+            BinaryOperator::Sub => "sub",
+            BinaryOperator::Mul => "mul",
+            BinaryOperator::Div => "div",
+            BinaryOperator::Rem => "rem",
+            BinaryOperator::LT => "lt",
+            BinaryOperator::LTE => "lte",
+            BinaryOperator::GT => "gt",
+            BinaryOperator::GTE => "gte",
+            BinaryOperator::EQ => "eq",
+            BinaryOperator::NE => "ne",
+            BinaryOperator::Or => "or",
+            BinaryOperator::And => "and",
+            BinaryOperator::Xor => "xor",
+            BinaryOperator::LSh => "lsh",
+            BinaryOperator::RSh => "rsh",
+        }
+    }
+}
+
 /*
  * Rust things to make newtyped IDs usable.
  */
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index d12417f1..0d6ca621 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -47,9 +47,21 @@ fn verify_structure(
             Node::If {
                 control: _,
                 cond: _,
+            }
+            | Node::Fork {
+                control: _,
+                factor: _,
+            }
+            | Node::Join {
+                control: _,
+                data: _,
             } => {
                 if users.len() != 2 {
-                    Err(format!("If node must have 2 users, not {}.", users.len()))?;
+                    Err(format!(
+                        "{} node must have 2 users, not {}.",
+                        node.upper_case_name(),
+                        users.len()
+                    ))?;
                 }
                 if let (
                     Node::ReadProd {
@@ -65,10 +77,13 @@ fn verify_structure(
                     &function.nodes[users[1].idx()],
                 ) {
                     if !((*index1 == 0 && *index2 == 1) || (*index1 == 1 && *index2 == 0)) {
-                        Err("If node's user ReadProd nodes must reference different elements of If node's output product.")?;
+                        Err(format!("{} node's user ReadProd nodes must reference different elements of output product.", node.upper_case_name()))?;
                     }
                 } else {
-                    Err("If node's users must both be ReadProd nodes.")?;
+                    Err(format!(
+                        "{} node's users must both be ReadProd nodes.",
+                        node.upper_case_name()
+                    ))?;
                 }
             }
             Node::Phi { control, data: _ } => {
-- 
GitLab


From 7e2687358a361a3f0085973d2cc7a60e6a632758 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 27 Sep 2023 17:34:27 -0500
Subject: [PATCH 066/105] Comments

---
 hercules_ir/src/verify.rs | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index 0d6ca621..a737aafe 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -21,7 +21,11 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
         .iter()
         .map(|def_use| reverse_postorder(def_use))
         .collect();
+
+    // Typecheck the module.
     let typing = typecheck(module, &reverse_postorders)?;
+
+    // Check the structure of the functions in the module.
     for (function, (def_use, typing)) in
         zip(module.functions.iter(), zip(def_uses.iter(), typing.iter()))
     {
@@ -44,6 +48,9 @@ fn verify_structure(
     for (idx, node) in function.nodes.iter().enumerate() {
         let users = def_use.get_users(NodeID::new(idx));
         match node {
+            // If, fork, and join nodes all have the same structural
+            // constraints - each must have exactly two ReadProd users, which
+            // reference differing elements of the node's output product.
             Node::If {
                 control: _,
                 cond: _,
@@ -86,12 +93,14 @@ fn verify_structure(
                     ))?;
                 }
             }
+            // Phi nodes must depend on a region node.
             Node::Phi { control, data: _ } => {
                 if let Node::Region { preds: _ } = function.nodes[control.idx()] {
                 } else {
                     Err("Phi node's control input must be a region node.")?;
                 }
             }
+            // Return nodes must have no users.
             Node::Return {
                 control: _,
                 value: _,
@@ -103,6 +112,8 @@ fn verify_structure(
                     ))?;
                 }
             }
+            // Match nodes are similar to if nodes, but have a variable number
+            // of ReadProd users, corresponding to the sum type being matched.
             Node::Match { control: _, sum } => {
                 let sum_ty = &types[typing[sum.idx()].idx()];
                 if let Type::Summation(tys) = sum_ty {
-- 
GitLab


From d80d1cf9173adcd3879ca2b47abc01d990532fde Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 28 Sep 2023 19:43:58 -0500
Subject: [PATCH 067/105] Rename dataflow to forward_dataflow

---
 hercules_ir/src/dataflow.rs  | 18 +++++++++---------
 hercules_ir/src/typecheck.rs |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index ce7ae232..9dfa6867 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -15,16 +15,16 @@ pub trait Semilattice: Eq {
 }
 
 /*
- * Top level dataflow function. This routine is slightly more generic than the
- * typical textbook definition. The flow function takes an ordered slice of
- * predecessor lattice values, rather than a single lattice value. Thus, the
- * flow function can perform non-associative and non-commutative operations on
- * the "in" lattice values. This makes this routine more useful for some
- * analyses, such as typechecking. To perform the typical behavior, the flow
- * function should start by meeting the input lattice values into a single
- * lattice value.
+ * Top level forward dataflow function. This routine is slightly more generic
+ * than the typical textbook definition. The flow function takes an ordered
+ * slice of predecessor lattice values, rather than a single lattice value.
+ * Thus, the flow function can perform non-associative and non-commutative
+ * operations on the "in" lattice values. This makes this routine more useful
+ * for some analyses, such as typechecking. To perform the typical behavior,
+ * the flow function should start by meeting the input lattice values into a
+ * single lattice value.
  */
-pub fn dataflow<L, F>(
+pub fn forward_dataflow<L, F>(
     function: &Function,
     reverse_postorder: &Vec<NodeID>,
     mut flow_function: F,
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 9b8b4ef9..4bfa9fae 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -103,7 +103,7 @@ pub fn typecheck(
     // values.
     let results: Vec<Vec<TypeSemilattice>> = zip(functions, reverse_postorders)
         .map(|(function, reverse_postorder)| {
-            dataflow(function, reverse_postorder, |inputs, id| {
+            forward_dataflow(function, reverse_postorder, |inputs, id| {
                 typeflow(
                     inputs,
                     id,
-- 
GitLab


From 8d0b589015a68f900a75f5de14876443d38768d3 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 29 Sep 2023 14:42:42 -0500
Subject: [PATCH 068/105] Check that compound types containing control types
 aren't built

---
 hercules_ir/src/typecheck.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 4bfa9fae..cab2f250 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -702,6 +702,10 @@ fn typeflow(
                     } else if let Concrete(data_id) = inputs[1] {
                         if elem_tys[*index] != *data_id {
                             return Error(format!("WriteProd node's data input doesn't match the type of the element at index {} inside the product type.", index));
+                        } else if let Type::Control(_) = &types[data_id.idx()] {
+                            return Error(String::from(
+                                "WriteProd node's data input cannot have a control type.",
+                            ));
                         }
                     } else if inputs[1].is_error() {
                         // If an input lattice value is an error, we must
@@ -773,6 +777,10 @@ fn typeflow(
                     if let Concrete(data_id) = inputs[1] {
                         if elem_id != *data_id {
                             return Error(String::from("WriteArray node's array and data inputs must have compatible types (type of data input must be the same as the array input's element type)."));
+                        } else if let Type::Control(_) = &types[data_id.idx()] {
+                            return Error(String::from(
+                                "WriteArray node's data input cannot have a control type.",
+                            ));
                         }
                     }
                 } else {
@@ -831,6 +839,12 @@ fn typeflow(
             }
 
             if let Concrete(id) = inputs[0] {
+                if let Type::Control(_) = &types[id.idx()] {
+                    return Error(String::from(
+                        "BuildSum node's data input cannot have a control type.",
+                    ));
+                }
+
                 // BuildSum node stores its own result type.
                 if let Type::Summation(variants) = &types[sum_ty.idx()] {
                     // Must reference an existing variant.
-- 
GitLab


From 0c6a85cbb910763833de6f1e393cadc8cd978091 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Fri, 29 Sep 2023 17:12:48 -0500
Subject: [PATCH 069/105] Dom skeleton

---
 hercules_ir/src/dom.rs | 17 +++++++++++++++++
 hercules_ir/src/lib.rs |  2 ++
 2 files changed, 19 insertions(+)
 create mode 100644 hercules_ir/src/dom.rs

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
new file mode 100644
index 00000000..45415af1
--- /dev/null
+++ b/hercules_ir/src/dom.rs
@@ -0,0 +1,17 @@
+use crate::*;
+
+/*
+ * Custom type for storing a dominator tree. For each node except the start
+ * node, store its immediate dominator.
+ */
+#[derive(Debug, Clone)]
+pub struct DomTree {
+    immediate_dominator: Vec<NodeID>,
+}
+
+/*
+ * Top level function for calculating dominator trees.
+ */
+pub fn dominator(function: &Function) -> DomTree {
+    todo!()
+}
diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs
index cd66d5fb..8046498a 100644
--- a/hercules_ir/src/lib.rs
+++ b/hercules_ir/src/lib.rs
@@ -1,5 +1,6 @@
 pub mod dataflow;
 pub mod def_use;
+pub mod dom;
 pub mod dot;
 pub mod ir;
 pub mod parse;
@@ -8,6 +9,7 @@ pub mod verify;
 
 pub use crate::dataflow::*;
 pub use crate::def_use::*;
+pub use crate::dom::*;
 pub use crate::dot::*;
 pub use crate::ir::*;
 pub use crate::parse::*;
-- 
GitLab


From 7fcf37e5261e53ae6653b7551b4a6e6e8aee0211 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 30 Sep 2023 17:10:13 -0500
Subject: [PATCH 070/105] Domtree impl

---
 hercules_ir/src/dom.rs | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 45415af1..20feaf78 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -1,12 +1,39 @@
 use crate::*;
 
+use std::collections::HashMap;
+
 /*
- * Custom type for storing a dominator tree. For each node except the start
- * node, store its immediate dominator.
+ * Custom type for storing a dominator tree. For each control node, store its
+ * immediate dominator.
  */
 #[derive(Debug, Clone)]
 pub struct DomTree {
-    immediate_dominator: Vec<NodeID>,
+    imm_doms: HashMap<NodeID, NodeID>,
+}
+
+impl DomTree {
+    pub fn imm_dom(&self, x: NodeID) -> Option<NodeID> {
+        self.imm_doms.get(&x).map(|x| x.clone())
+    }
+
+    pub fn does_imm_dom(&self, a: NodeID, b: NodeID) -> bool {
+        self.imm_dom(b) == Some(a)
+    }
+
+    pub fn does_dom(&self, a: NodeID, b: NodeID) -> bool {
+        let mut iter = Some(b);
+        while let Some(b) = iter {
+            if b == a {
+                return true;
+            }
+            iter = self.imm_dom(b);
+        }
+        false
+    }
+
+    pub fn does_prop_dom(&self, a: NodeID, b: NodeID) -> bool {
+        a != b && self.does_dom(a, b)
+    }
 }
 
 /*
-- 
GitLab


From b81974a88b2d63cfb5ac96325a8262c521656f0e Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 30 Sep 2023 17:36:45 -0500
Subject: [PATCH 071/105] Compute sub-cfg for IR

---
 hercules_ir/src/dom.rs | 83 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 20feaf78..a6c5fb32 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -22,6 +22,8 @@ impl DomTree {
 
     pub fn does_dom(&self, a: NodeID, b: NodeID) -> bool {
         let mut iter = Some(b);
+
+        // Go up dominator tree until finding a, or root of tree.
         while let Some(b) = iter {
             if b == a {
                 return true;
@@ -40,5 +42,86 @@ impl DomTree {
  * Top level function for calculating dominator trees.
  */
 pub fn dominator(function: &Function) -> DomTree {
+    // Step 1: compute the sub-CFG for the function. This is the graph the
+    // dominator tree will be built for.
+    let sub_cfg = control_nodes(function);
+
     todo!()
 }
+
+/*
+ * Enum for storing control uses of a node. Calculated alongside control nodes
+ * in control_nodes.
+ */
+#[derive(Debug, Clone)]
+pub enum ControlUses<'a> {
+    Zero,
+    One([NodeID; 1]),
+    Variable(&'a Box<[NodeID]>),
+}
+
+impl<'a> AsRef<[NodeID]> for ControlUses<'a> {
+    fn as_ref(&self) -> &[NodeID] {
+        match self {
+            ControlUses::Zero => &[],
+            ControlUses::One(x) => x,
+            ControlUses::Variable(x) => x,
+        }
+    }
+}
+
+pub type SubCFG<'a> = Vec<(NodeID, ControlUses<'a>)>;
+
+/*
+ * Top level function for getting all the control nodes in a function. Also
+ * returns the control uses of each control node, in effect returning the
+ * control subset of the IR graph.
+ */
+pub fn control_nodes(function: &Function) -> SubCFG {
+    use Node::*;
+
+    let mut control_nodes = vec![];
+    for (idx, node) in function.nodes.iter().enumerate() {
+        match node {
+            Start => {
+                control_nodes.push((NodeID::new(idx), ControlUses::Zero));
+            }
+            Region { preds } => {
+                control_nodes.push((NodeID::new(idx), ControlUses::Variable(&preds)));
+            }
+            If { control, cond: _ }
+            | Fork { control, factor: _ }
+            | Join { control, data: _ }
+            | Return { control, value: _ }
+            | Match { control, sum: _ } => {
+                control_nodes.push((NodeID::new(idx), ControlUses::One([*control])));
+            }
+            ReadProd { prod, index } => match function.nodes[prod.idx()] {
+                // ReadProd nodes are control nodes if their predecessor is a
+                // legal control node, and if it's the right index.
+                Match { control: _, sum: _ } => {
+                    control_nodes.push((NodeID::new(idx), ControlUses::One([*prod])));
+                }
+                If {
+                    control: _,
+                    cond: _,
+                }
+                | Fork {
+                    control: _,
+                    factor: _,
+                }
+                | Join {
+                    control: _,
+                    data: _,
+                } => {
+                    if *index == 0 {
+                        control_nodes.push((NodeID::new(idx), ControlUses::One([*prod])))
+                    }
+                }
+                _ => {}
+            },
+            _ => {}
+        }
+    }
+    control_nodes
+}
-- 
GitLab


From 58530e9f9bf99806bfc5ec40bf127e2981d7aa76 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 30 Sep 2023 18:24:02 -0500
Subject: [PATCH 072/105] Preorder DFS of sub CFG

---
 hercules_ir/src/dom.rs | 91 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 81 insertions(+), 10 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index a6c5fb32..61dfc06c 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -1,3 +1,7 @@
+extern crate bitvec;
+
+use dom::bitvec::prelude::*;
+
 use crate::*;
 
 use std::collections::HashMap;
@@ -39,12 +43,17 @@ impl DomTree {
 }
 
 /*
- * Top level function for calculating dominator trees.
+ * Top level function for calculating dominator trees. Uses the semi-NCA
+ * algorithm, as described in "Finding Dominators in Practice".
  */
 pub fn dominator(function: &Function) -> DomTree {
     // Step 1: compute the sub-CFG for the function. This is the graph the
     // dominator tree will be built for.
-    let sub_cfg = control_nodes(function);
+    let backward_sub_cfg = control_nodes(function);
+    let forward_sub_cfg = reorient_sub_cfg(&backward_sub_cfg);
+
+    // Step 2: compute pre-order DFS of CFG.
+    let preorder = preorder(&forward_sub_cfg);
 
     todo!()
 }
@@ -70,37 +79,37 @@ impl<'a> AsRef<[NodeID]> for ControlUses<'a> {
     }
 }
 
-pub type SubCFG<'a> = Vec<(NodeID, ControlUses<'a>)>;
+pub type BackwardSubCFG<'a> = HashMap<NodeID, ControlUses<'a>>;
 
 /*
  * Top level function for getting all the control nodes in a function. Also
  * returns the control uses of each control node, in effect returning the
  * control subset of the IR graph.
  */
-pub fn control_nodes(function: &Function) -> SubCFG {
+pub fn control_nodes(function: &Function) -> BackwardSubCFG {
     use Node::*;
 
-    let mut control_nodes = vec![];
+    let mut control_nodes = HashMap::new();
     for (idx, node) in function.nodes.iter().enumerate() {
         match node {
             Start => {
-                control_nodes.push((NodeID::new(idx), ControlUses::Zero));
+                control_nodes.insert(NodeID::new(idx), ControlUses::Zero);
             }
             Region { preds } => {
-                control_nodes.push((NodeID::new(idx), ControlUses::Variable(&preds)));
+                control_nodes.insert(NodeID::new(idx), ControlUses::Variable(&preds));
             }
             If { control, cond: _ }
             | Fork { control, factor: _ }
             | Join { control, data: _ }
             | Return { control, value: _ }
             | Match { control, sum: _ } => {
-                control_nodes.push((NodeID::new(idx), ControlUses::One([*control])));
+                control_nodes.insert(NodeID::new(idx), ControlUses::One([*control]));
             }
             ReadProd { prod, index } => match function.nodes[prod.idx()] {
                 // ReadProd nodes are control nodes if their predecessor is a
                 // legal control node, and if it's the right index.
                 Match { control: _, sum: _ } => {
-                    control_nodes.push((NodeID::new(idx), ControlUses::One([*prod])));
+                    control_nodes.insert(NodeID::new(idx), ControlUses::One([*prod]));
                 }
                 If {
                     control: _,
@@ -115,7 +124,7 @@ pub fn control_nodes(function: &Function) -> SubCFG {
                     data: _,
                 } => {
                     if *index == 0 {
-                        control_nodes.push((NodeID::new(idx), ControlUses::One([*prod])))
+                        control_nodes.insert(NodeID::new(idx), ControlUses::One([*prod]));
                     }
                 }
                 _ => {}
@@ -125,3 +134,65 @@ pub fn control_nodes(function: &Function) -> SubCFG {
     }
     control_nodes
 }
+
+pub type ForwardSubCFG = HashMap<NodeID, Vec<NodeID>>;
+
+/*
+ * Utility for getting def-use edges of sub CFG.
+ */
+pub fn reorient_sub_cfg(backward: &BackwardSubCFG) -> ForwardSubCFG {
+    let mut forward = HashMap::new();
+
+    // HashMap doesn't have a get_mut_or_insert like API, so explicitly insert
+    // all possible keys (all control nodes).
+    for key in backward.keys() {
+        forward.insert(*key, vec![]);
+    }
+
+    // Then, insert def-use edges. Unwrap since all keys are initialized above
+    // with empty vectors.
+    for (user, defs) in backward.iter() {
+        for def in defs.as_ref() {
+            forward.get_mut(def).unwrap().push(*user);
+        }
+    }
+
+    forward
+}
+
+fn preorder(forward_sub_cfg: &ForwardSubCFG) -> Vec<NodeID> {
+    // Initialize order vector and bitset for tracking which nodes have been
+    // visited.
+    let order = Vec::with_capacity(forward_sub_cfg.len());
+    let visited = bitvec![u8, Lsb0; 0; forward_sub_cfg.len()];
+
+    // Order and visited are threaded through arguments / return pair of
+    // reverse_postorder_helper for ownership reasons.
+    let (order, _) = preorder_helper(NodeID::new(0), forward_sub_cfg, order, visited);
+    order
+}
+
+fn preorder_helper(
+    node: NodeID,
+    forward_sub_cfg: &ForwardSubCFG,
+    mut order: Vec<NodeID>,
+    mut visited: BitVec<u8, Lsb0>,
+) -> (Vec<NodeID>, BitVec<u8, Lsb0>) {
+    if visited[node.idx()] {
+        // If already visited, return early.
+        (order, visited)
+    } else {
+        // Set visited to true.
+        visited.set(node.idx(), true);
+
+        // Iterate over users.
+        for user in forward_sub_cfg.get(&node).unwrap() {
+            (order, visited) = preorder_helper(*user, forward_sub_cfg, order, visited);
+        }
+
+        // Before iterating users, push this node.
+        order.push(node);
+
+        (order, visited)
+    }
+}
-- 
GitLab


From 39748224df3d9c16e888dfb87d010d9dfafa83c6 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sat, 30 Sep 2023 18:27:39 -0500
Subject: [PATCH 073/105] Fix

---
 hercules_ir/src/dom.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 61dfc06c..9947e2cd 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -185,14 +185,14 @@ fn preorder_helper(
         // Set visited to true.
         visited.set(node.idx(), true);
 
+        // Before iterating users, push this node.
+        order.push(node);
+
         // Iterate over users.
         for user in forward_sub_cfg.get(&node).unwrap() {
             (order, visited) = preorder_helper(*user, forward_sub_cfg, order, visited);
         }
 
-        // Before iterating users, push this node.
-        order.push(node);
-
         (order, visited)
     }
 }
-- 
GitLab


From 0c785fc1af590bd977b239216409bb08e3216294 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 1 Oct 2023 10:25:17 -0500
Subject: [PATCH 074/105] Fix

---
 hercules_ir/src/dom.rs    | 21 +++++++++++++--------
 hercules_ir/src/verify.rs |  6 ++++++
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 9947e2cd..03b40898 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -108,14 +108,14 @@ pub fn control_nodes(function: &Function) -> BackwardSubCFG {
             ReadProd { prod, index } => match function.nodes[prod.idx()] {
                 // ReadProd nodes are control nodes if their predecessor is a
                 // legal control node, and if it's the right index.
-                Match { control: _, sum: _ } => {
-                    control_nodes.insert(NodeID::new(idx), ControlUses::One([*prod]));
-                }
-                If {
+                Match { control: _, sum: _ }
+                | If {
                     control: _,
                     cond: _,
+                } => {
+                    control_nodes.insert(NodeID::new(idx), ControlUses::One([*prod]));
                 }
-                | Fork {
+                Fork {
                     control: _,
                     factor: _,
                 }
@@ -143,8 +143,9 @@ pub type ForwardSubCFG = HashMap<NodeID, Vec<NodeID>>;
 pub fn reorient_sub_cfg(backward: &BackwardSubCFG) -> ForwardSubCFG {
     let mut forward = HashMap::new();
 
-    // HashMap doesn't have a get_mut_or_insert like API, so explicitly insert
-    // all possible keys (all control nodes).
+    // Every control node needs to be a key in forward, even if it has no
+    // def-use edges originating from it (the return node), so explicitly add
+    // them all here.
     for key in backward.keys() {
         forward.insert(*key, vec![]);
     }
@@ -164,7 +165,10 @@ fn preorder(forward_sub_cfg: &ForwardSubCFG) -> Vec<NodeID> {
     // Initialize order vector and bitset for tracking which nodes have been
     // visited.
     let order = Vec::with_capacity(forward_sub_cfg.len());
-    let visited = bitvec![u8, Lsb0; 0; forward_sub_cfg.len()];
+
+    // Visited is indexed by node ID, so find the largest possible node ID
+    // visited during the traversal.
+    let visited = bitvec![u8, Lsb0; 0; forward_sub_cfg.keys().map(|x| x.idx()).fold(std::usize::MIN, |a,b| a.max(b)) + 1];
 
     // Order and visited are threaded through arguments / return pair of
     // reverse_postorder_helper for ownership reasons.
@@ -178,6 +182,7 @@ fn preorder_helper(
     mut order: Vec<NodeID>,
     mut visited: BitVec<u8, Lsb0>,
 ) -> (Vec<NodeID>, BitVec<u8, Lsb0>) {
+    assert!(forward_sub_cfg.contains_key(&node));
     if visited[node.idx()] {
         // If already visited, return early.
         (order, visited)
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index a737aafe..383dd3d1 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -31,6 +31,12 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
     {
         verify_structure(function, def_use, typing, &module.types)?;
     }
+
+    // Check SSA, fork, and join dominance relations.
+    for function in module.functions.iter() {
+        let dom = dominator(&function);
+    }
+
     Ok(typing)
 }
 
-- 
GitLab


From 950462a496f8c2d9c2bb5d9bc25c889742ced2ef Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 1 Oct 2023 10:52:54 -0500
Subject: [PATCH 075/105] Keep track of parents in DFS

---
 hercules_ir/src/dom.rs | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 03b40898..3a046d2a 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -1,7 +1,5 @@
 extern crate bitvec;
 
-use dom::bitvec::prelude::*;
-
 use crate::*;
 
 use std::collections::HashMap;
@@ -162,17 +160,16 @@ pub fn reorient_sub_cfg(backward: &BackwardSubCFG) -> ForwardSubCFG {
 }
 
 fn preorder(forward_sub_cfg: &ForwardSubCFG) -> Vec<NodeID> {
-    // Initialize order vector and bitset for tracking which nodes have been
-    // visited.
+    // Initialize order vector and visited hashmap for tracking which nodes have
+    // been visited.
     let order = Vec::with_capacity(forward_sub_cfg.len());
 
-    // Visited is indexed by node ID, so find the largest possible node ID
-    // visited during the traversal.
-    let visited = bitvec![u8, Lsb0; 0; forward_sub_cfg.keys().map(|x| x.idx()).fold(std::usize::MIN, |a,b| a.max(b)) + 1];
+    // Explicitly keep track of parents in DFS tree. Doubles as a visited set.
+    let parents = HashMap::new();
 
-    // Order and visited are threaded through arguments / return pair of
+    // Order and parents are threaded through arguments / return pair of
     // reverse_postorder_helper for ownership reasons.
-    let (order, _) = preorder_helper(NodeID::new(0), forward_sub_cfg, order, visited);
+    let (order, _) = preorder_helper(NodeID::new(0), forward_sub_cfg, order, parents);
     order
 }
 
@@ -180,24 +177,29 @@ fn preorder_helper(
     node: NodeID,
     forward_sub_cfg: &ForwardSubCFG,
     mut order: Vec<NodeID>,
-    mut visited: BitVec<u8, Lsb0>,
-) -> (Vec<NodeID>, BitVec<u8, Lsb0>) {
+    mut parents: HashMap<NodeID, NodeID>,
+) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
     assert!(forward_sub_cfg.contains_key(&node));
-    if visited[node.idx()] {
+    if parents.contains_key(&node) {
         // If already visited, return early.
-        (order, visited)
+        (order, parents)
     } else {
-        // Set visited to true.
-        visited.set(node.idx(), true);
+        // Keep track of DFS parent for region nodes.
+        if let Some(parent) = order.last() {
+            // Only node where the above isn't true is the start node, which
+            // has no incoming edge. Thus, there's no need to insert the start
+            // node into the parents map for tracking visitation.
+            parents.insert(node, *parent);
+        }
 
         // Before iterating users, push this node.
         order.push(node);
 
         // Iterate over users.
         for user in forward_sub_cfg.get(&node).unwrap() {
-            (order, visited) = preorder_helper(*user, forward_sub_cfg, order, visited);
+            (order, parents) = preorder_helper(*user, forward_sub_cfg, order, parents);
         }
 
-        (order, visited)
+        (order, parents)
     }
 }
-- 
GitLab


From 404c0cf550ad2936e0c7b3248d2cf7b3225dd334 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 2 Oct 2023 11:24:13 -0500
Subject: [PATCH 076/105] Calculate semidominators, incorrectly

---
 hercules_ir/src/dom.rs | 96 +++++++++++++++++++++++++++++++++++++++---
 samples/simple1.hir    | 12 +-----
 2 files changed, 91 insertions(+), 17 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 3a046d2a..4c94f004 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -10,12 +10,12 @@ use std::collections::HashMap;
  */
 #[derive(Debug, Clone)]
 pub struct DomTree {
-    imm_doms: HashMap<NodeID, NodeID>,
+    idom: HashMap<NodeID, NodeID>,
 }
 
 impl DomTree {
     pub fn imm_dom(&self, x: NodeID) -> Option<NodeID> {
-        self.imm_doms.get(&x).map(|x| x.clone())
+        self.idom.get(&x).map(|x| x.clone())
     }
 
     pub fn does_imm_dom(&self, a: NodeID, b: NodeID) -> bool {
@@ -51,9 +51,92 @@ pub fn dominator(function: &Function) -> DomTree {
     let forward_sub_cfg = reorient_sub_cfg(&backward_sub_cfg);
 
     // Step 2: compute pre-order DFS of CFG.
-    let preorder = preorder(&forward_sub_cfg);
+    let (preorder, mut parents) = preorder(&forward_sub_cfg);
+    let mut node_numbers = HashMap::new();
+    for (number, node) in preorder.iter().enumerate() {
+        node_numbers.insert(node, number);
+    }
+
+    // Step 3: define eval, which will be used to compute semi-dominators.
+    let mut eval_stack = vec![];
+    let mut labels: Vec<_> = (0..preorder.len()).collect();
+    let mut eval = |v, last_linked, mut parents: HashMap<NodeID, NodeID>, semi: Vec<NodeID>| {
+        let p_v = &parents[v];
+        let p_v_n = node_numbers[p_v];
+        if p_v_n < last_linked {
+            return (labels[p_v_n], parents, semi);
+        }
+
+        // Get ancestors of v, except for the virtual root.
+        assert!(eval_stack.is_empty());
+        let mut iter = *v;
+        let mut p_iter = parents[v];
+        loop {
+            eval_stack.push(iter);
+            iter = p_iter;
+            p_iter = parents[&iter];
+            if node_numbers[&p_iter] < last_linked {
+                break;
+            }
+        }
+
+        let old_parents = parents.clone();
+        // Perform path compression.
+        let mut iter_label_number = labels[node_numbers[&iter]];
+        for node in eval_stack.drain(..).rev() {
+            *parents.get_mut(&node).unwrap() = parents[&iter];
+            let node_label_number = labels[node_numbers[&node]];
+            if node_numbers[&semi[iter_label_number]] < node_numbers[&semi[node_label_number]] {
+                labels[node_numbers[&node]] = labels[node_numbers[&iter]]
+            } else {
+                iter_label_number = node_label_number;
+            }
+            iter = node;
+        }
+        println!("{:?}", parents);
+        println!("{:?}", old_parents);
+        println!("");
 
-    todo!()
+        return (labels[node_numbers[&iter]], parents, semi);
+    };
+
+    // Step 4: initialize idom.
+    let mut idom = HashMap::new();
+    for w in preorder[1..].iter() {
+        // Each idom starts as the parent node.
+        idom.insert(w, parents[w]);
+    }
+
+    // Step 5: compute semi-dominators. This implementation is based off of
+    // LLVM's dominator implementation.
+    let mut semi = vec![NodeID::new(0); preorder.len()];
+    for w_n in (2..preorder.len()).rev() {
+        let w = preorder[w_n];
+        semi[w_n] = parents[&w];
+        for v in backward_sub_cfg[&w].as_ref() {
+            println!("Hello!");
+            let (new_semi_index, new_parents, new_semi) = eval(v, w_n + 1, parents, semi);
+            parents = new_parents;
+            semi = new_semi;
+            let new_semi_node = semi[new_semi_index];
+            if node_numbers[&new_semi_node] < node_numbers[&semi[w_n]] {
+                semi[w_n] = new_semi_node;
+            }
+        }
+    }
+
+    println!("{:?}", semi);
+    println!("{:?}", preorder);
+    println!(
+        "{:?}",
+        preorder
+            .iter()
+            .map(|id| function.nodes[id.idx()].upper_case_name())
+            .collect::<Vec<_>>()
+    );
+    DomTree {
+        idom: HashMap::new(),
+    }
 }
 
 /*
@@ -159,7 +242,7 @@ pub fn reorient_sub_cfg(backward: &BackwardSubCFG) -> ForwardSubCFG {
     forward
 }
 
-fn preorder(forward_sub_cfg: &ForwardSubCFG) -> Vec<NodeID> {
+fn preorder(forward_sub_cfg: &ForwardSubCFG) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
     // Initialize order vector and visited hashmap for tracking which nodes have
     // been visited.
     let order = Vec::with_capacity(forward_sub_cfg.len());
@@ -169,8 +252,7 @@ fn preorder(forward_sub_cfg: &ForwardSubCFG) -> Vec<NodeID> {
 
     // Order and parents are threaded through arguments / return pair of
     // reverse_postorder_helper for ownership reasons.
-    let (order, _) = preorder_helper(NodeID::new(0), forward_sub_cfg, order, parents);
-    order
+    preorder_helper(NodeID::new(0), forward_sub_cfg, order, parents)
 }
 
 fn preorder_helper(
diff --git a/samples/simple1.hir b/samples/simple1.hir
index 415b2bc3..229a79bc 100644
--- a/samples/simple1.hir
+++ b/samples/simple1.hir
@@ -1,11 +1,3 @@
 fn myfunc(x: i32) -> i32
-  y = call<5>(add, x, x)
-  r = return(start, y)
-
-fn add<1>(x: i32, y: i32) -> i32
-  c = constant(i8, 5)
-  dc = dynamic_constant(#0)
-  r = return(start, s)
-  w = add(z, c)
-  s = add(w, dc)
-  z = add(x, y)
\ No newline at end of file
+  a = region(start)
+  b = return(a, x)
-- 
GitLab


From ffacbda4eedfab0fd8b0c9380badece8bb5a3ec7 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 2 Oct 2023 12:58:31 -0500
Subject: [PATCH 077/105] Fix semi-dom calc

---
 hercules_ir/src/dom.rs | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 4c94f004..45a4888e 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -80,7 +80,6 @@ pub fn dominator(function: &Function) -> DomTree {
             }
         }
 
-        let old_parents = parents.clone();
         // Perform path compression.
         let mut iter_label_number = labels[node_numbers[&iter]];
         for node in eval_stack.drain(..).rev() {
@@ -93,9 +92,6 @@ pub fn dominator(function: &Function) -> DomTree {
             }
             iter = node;
         }
-        println!("{:?}", parents);
-        println!("{:?}", old_parents);
-        println!("");
 
         return (labels[node_numbers[&iter]], parents, semi);
     };
@@ -113,9 +109,8 @@ pub fn dominator(function: &Function) -> DomTree {
     for w_n in (2..preorder.len()).rev() {
         let w = preorder[w_n];
         semi[w_n] = parents[&w];
-        for v in backward_sub_cfg[&w].as_ref() {
-            println!("Hello!");
-            let (new_semi_index, new_parents, new_semi) = eval(v, w_n + 1, parents, semi);
+        for v in forward_sub_cfg[&w].iter() {
+            let (new_semi_index, new_parents, new_semi) = eval(&v, w_n + 1, parents, semi);
             parents = new_parents;
             semi = new_semi;
             let new_semi_node = semi[new_semi_index];
@@ -125,18 +120,7 @@ pub fn dominator(function: &Function) -> DomTree {
         }
     }
 
-    println!("{:?}", semi);
-    println!("{:?}", preorder);
-    println!(
-        "{:?}",
-        preorder
-            .iter()
-            .map(|id| function.nodes[id.idx()].upper_case_name())
-            .collect::<Vec<_>>()
-    );
-    DomTree {
-        idom: HashMap::new(),
-    }
+    todo!()
 }
 
 /*
-- 
GitLab


From 33c4235088fe0e62b28981368167a6de9dd0a38e Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 2 Oct 2023 13:19:54 -0500
Subject: [PATCH 078/105] Print node ID in dot, incorrect idoms

---
 hercules_ir/src/dom.rs | 29 ++++++++++++-----
 hercules_ir/src/dot.rs | 74 ++++++++++++++++++++++++++++++------------
 2 files changed, 74 insertions(+), 29 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 45a4888e..a11f9587 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -96,14 +96,7 @@ pub fn dominator(function: &Function) -> DomTree {
         return (labels[node_numbers[&iter]], parents, semi);
     };
 
-    // Step 4: initialize idom.
-    let mut idom = HashMap::new();
-    for w in preorder[1..].iter() {
-        // Each idom starts as the parent node.
-        idom.insert(w, parents[w]);
-    }
-
-    // Step 5: compute semi-dominators. This implementation is based off of
+    // Step 4: compute semi-dominators. This implementation is based off of
     // LLVM's dominator implementation.
     let mut semi = vec![NodeID::new(0); preorder.len()];
     for w_n in (2..preorder.len()).rev() {
@@ -120,7 +113,25 @@ pub fn dominator(function: &Function) -> DomTree {
         }
     }
 
-    todo!()
+    // Step 5: compute idom.
+    let mut idom = HashMap::new();
+    for w in preorder[1..].iter() {
+        // Each idom starts as the parent node.
+        idom.insert(*w, parents[w]);
+    }
+    for w_n in 2..preorder.len() {
+        let w = preorder[w_n];
+        let semi_num = node_numbers[&semi[w_n]];
+        let mut w_idom_candidate = idom[&w];
+        while node_numbers[&w_idom_candidate] > semi_num {
+            w_idom_candidate = idom[&w_idom_candidate];
+        }
+        *idom.get_mut(&w).unwrap() = w_idom_candidate;
+    }
+
+    println!("{:?}", idom);
+
+    DomTree { idom }
 }
 
 /*
diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index dd85cc99..8623ca5e 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -50,11 +50,11 @@ fn write_node<W: std::fmt::Write>(
         visited.insert(NodeID::new(j), name.clone());
         let visited = match node {
             Node::Start => {
-                write!(w, "{} [label=\"start\"];\n", name)?;
+                write!(w, "{} [xlabel={}, label=\"start\"];\n", name, j)?;
                 visited
             }
             Node::Region { preds } => {
-                write!(w, "{} [label=\"region\"];\n", name)?;
+                write!(w, "{} [xlabel={}, label=\"region\"];\n", name, j)?;
                 for (idx, pred) in preds.iter().enumerate() {
                     let (pred_name, tmp_visited) = write_node(i, pred.idx(), module, visited, w)?;
                     visited = tmp_visited;
@@ -67,7 +67,7 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::If { control, cond } => {
-                write!(w, "{} [label=\"if\"];\n", name)?;
+                write!(w, "{} [xlabel={}, label=\"if\"];\n", name, j)?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
                 let (cond_name, visited) = write_node(i, cond.idx(), module, visited, w)?;
                 write!(
@@ -81,8 +81,9 @@ fn write_node<W: std::fmt::Write>(
             Node::Fork { control, factor } => {
                 write!(
                     w,
-                    "{} [label=\"fork<{:?}>\"];\n",
+                    "{} [xlabel={}, label=\"fork<{:?}>\"];\n",
                     name,
+                    j,
                     module.dynamic_constants[factor.idx()]
                 )?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
@@ -94,7 +95,7 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::Join { control, data } => {
-                write!(w, "{} [label=\"join\"];\n", name,)?;
+                write!(w, "{} [xlabel={}, label=\"join\"];\n", name, j)?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
                 let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
                 write!(
@@ -106,7 +107,7 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::Phi { control, data } => {
-                write!(w, "{} [label=\"phi\"];\n", name)?;
+                write!(w, "{} [xlabel={}, label=\"phi\"];\n", name, j)?;
                 let (control_name, mut visited) = write_node(i, control.idx(), module, visited, w)?;
                 write!(
                     w,
@@ -123,7 +124,7 @@ fn write_node<W: std::fmt::Write>(
             Node::Return { control, value } => {
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
                 let (value_name, visited) = write_node(i, value.idx(), module, visited, w)?;
-                write!(w, "{} [label=\"return\"];\n", name)?;
+                write!(w, "{} [xlabel={}, label=\"return\"];\n", name, j)?;
                 write!(
                     w,
                     "{} -> {} [label=\"control\", style=\"dashed\"];\n",
@@ -133,14 +134,21 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::Parameter { index } => {
-                write!(w, "{} [label=\"param #{}\"];\n", name, index + 1)?;
+                write!(
+                    w,
+                    "{} [xlabel={}, label=\"param #{}\"];\n",
+                    name,
+                    j,
+                    index + 1
+                )?;
                 visited
             }
             Node::Constant { id } => {
                 write!(
                     w,
-                    "{} [label=\"{:?}\"];\n",
+                    "{} [xlabel={}, label=\"{:?}\"];\n",
                     name,
+                    j,
                     module.constants[id.idx()]
                 )?;
                 visited
@@ -148,20 +156,33 @@ fn write_node<W: std::fmt::Write>(
             Node::DynamicConstant { id } => {
                 write!(
                     w,
-                    "{} [label=\"dynamic_constant({:?})\"];\n",
+                    "{} [xlabel={}, label=\"dynamic_constant({:?})\"];\n",
                     name,
+                    j,
                     module.dynamic_constants[id.idx()]
                 )?;
                 visited
             }
             Node::Unary { input, op } => {
-                write!(w, "{} [label=\"{}\"];\n", name, op.lower_case_name())?;
+                write!(
+                    w,
+                    "{} [xlabel={}, label=\"{}\"];\n",
+                    name,
+                    j,
+                    op.lower_case_name()
+                )?;
                 let (input_name, visited) = write_node(i, input.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"input\"];\n", input_name, name)?;
                 visited
             }
             Node::Binary { left, right, op } => {
-                write!(w, "{} [label=\"{}\"];\n", name, op.lower_case_name())?;
+                write!(
+                    w,
+                    "{} [xlabel={}, label=\"{}\"];\n",
+                    name,
+                    j,
+                    op.lower_case_name()
+                )?;
                 let (left_name, visited) = write_node(i, left.idx(), module, visited, w)?;
                 let (right_name, visited) = write_node(i, right.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"left\"];\n", left_name, name)?;
@@ -173,7 +194,7 @@ fn write_node<W: std::fmt::Write>(
                 dynamic_constants,
                 args,
             } => {
-                write!(w, "{} [label=\"call<", name,)?;
+                write!(w, "{} [xlabel={}, label=\"call<", name, j)?;
                 for (idx, id) in dynamic_constants.iter().enumerate() {
                     let dc = &module.dynamic_constants[id.idx()];
                     if idx == 0 {
@@ -198,13 +219,21 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::ReadProd { prod, index } => {
-                write!(w, "{} [label=\"read_prod({})\"];\n", name, index)?;
+                write!(
+                    w,
+                    "{} [xlabel={}, label=\"read_prod({})\"];\n",
+                    name, j, index
+                )?;
                 let (prod_name, visited) = write_node(i, prod.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"prod\"];\n", prod_name, name)?;
                 visited
             }
             Node::WriteProd { prod, data, index } => {
-                write!(w, "{} [label=\"write_prod({})\"];\n", name, index)?;
+                write!(
+                    w,
+                    "{} [xlabel={}, label=\"write_prod({})\"];\n",
+                    name, j, index
+                )?;
                 let (prod_name, visited) = write_node(i, prod.idx(), module, visited, w)?;
                 let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"prod\"];\n", prod_name, name)?;
@@ -212,7 +241,7 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::ReadArray { array, index } => {
-                write!(w, "{} [label=\"read_array\"];\n", name)?;
+                write!(w, "{} [xlabel={}, label=\"read_array\"];\n", name, j)?;
                 let (array_name, visited) = write_node(i, array.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"array\"];\n", array_name, name)?;
                 let (index_name, visited) = write_node(i, index.idx(), module, visited, w)?;
@@ -220,7 +249,7 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::WriteArray { array, data, index } => {
-                write!(w, "{} [label=\"write_array\"];\n", name)?;
+                write!(w, "{} [xlabel={}, label=\"write_array\"];\n", name, j)?;
                 let (array_name, visited) = write_node(i, array.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"array\"];\n", array_name, name)?;
                 let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
@@ -230,7 +259,7 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::Match { control, sum } => {
-                write!(w, "{} [label=\"match\"];\n", name)?;
+                write!(w, "{} [xlabel={}, label=\"match\"];\n", name, j)?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
                 write!(
                     w,
@@ -248,8 +277,9 @@ fn write_node<W: std::fmt::Write>(
             } => {
                 write!(
                     w,
-                    "{} [label=\"build_sum({:?}, {})\"];\n",
+                    "{} [xlabel={}, label=\"build_sum({:?}, {})\"];\n",
                     name,
+                    j,
                     module.types[sum_ty.idx()],
                     variant
                 )?;
@@ -258,7 +288,11 @@ fn write_node<W: std::fmt::Write>(
                 visited
             }
             Node::ExtractSum { data, variant } => {
-                write!(w, "{} [label=\"extract_sum({})\"];\n", name, variant)?;
+                write!(
+                    w,
+                    "{} [xlabel={}, label=\"extract_sum({})\"];\n",
+                    name, j, variant
+                )?;
                 let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
                 write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
                 visited
-- 
GitLab


From e8584b6b7ea914e2789821ec8eb02af24c9bb234 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 2 Oct 2023 13:27:19 -0500
Subject: [PATCH 079/105] One idom fix

---
 hercules_ir/src/dom.rs | 10 +++++-----
 samples/simple1.hir    |  4 +++-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index a11f9587..6f714b03 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -56,6 +56,11 @@ pub fn dominator(function: &Function) -> DomTree {
     for (number, node) in preorder.iter().enumerate() {
         node_numbers.insert(node, number);
     }
+    let mut idom = HashMap::new();
+    for w in preorder[1..].iter() {
+        // Each idom starts as the parent node.
+        idom.insert(*w, parents[w]);
+    }
 
     // Step 3: define eval, which will be used to compute semi-dominators.
     let mut eval_stack = vec![];
@@ -114,11 +119,6 @@ pub fn dominator(function: &Function) -> DomTree {
     }
 
     // Step 5: compute idom.
-    let mut idom = HashMap::new();
-    for w in preorder[1..].iter() {
-        // Each idom starts as the parent node.
-        idom.insert(*w, parents[w]);
-    }
     for w_n in 2..preorder.len() {
         let w = preorder[w_n];
         let semi_num = node_numbers[&semi[w_n]];
diff --git a/samples/simple1.hir b/samples/simple1.hir
index 229a79bc..92c1435b 100644
--- a/samples/simple1.hir
+++ b/samples/simple1.hir
@@ -1,3 +1,5 @@
 fn myfunc(x: i32) -> i32
   a = region(start)
-  b = return(a, x)
+  b = region(start)
+  c = region(a, b)
+  d = return(c, x)
-- 
GitLab


From 19dad274273ef7e966bc27723c225a54ad5fdf34 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 2 Oct 2023 13:40:46 -0500
Subject: [PATCH 080/105] Fix preorder dfs

---
 hercules_ir/src/dom.rs | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 6f714b03..1ec5c6d7 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -118,6 +118,13 @@ pub fn dominator(function: &Function) -> DomTree {
         }
     }
 
+    println!(
+        "{:?}",
+        (0..preorder.len())
+            .map(|idx| (preorder[idx], semi[idx]))
+            .collect::<HashMap<_, _>>()
+    );
+
     // Step 5: compute idom.
     for w_n in 2..preorder.len() {
         let w = preorder[w_n];
@@ -247,11 +254,12 @@ fn preorder(forward_sub_cfg: &ForwardSubCFG) -> (Vec<NodeID>, HashMap<NodeID, No
 
     // Order and parents are threaded through arguments / return pair of
     // reverse_postorder_helper for ownership reasons.
-    preorder_helper(NodeID::new(0), forward_sub_cfg, order, parents)
+    preorder_helper(NodeID::new(0), None, forward_sub_cfg, order, parents)
 }
 
 fn preorder_helper(
     node: NodeID,
+    parent: Option<NodeID>,
     forward_sub_cfg: &ForwardSubCFG,
     mut order: Vec<NodeID>,
     mut parents: HashMap<NodeID, NodeID>,
@@ -262,11 +270,11 @@ fn preorder_helper(
         (order, parents)
     } else {
         // Keep track of DFS parent for region nodes.
-        if let Some(parent) = order.last() {
+        if let Some(parent) = parent {
             // Only node where the above isn't true is the start node, which
             // has no incoming edge. Thus, there's no need to insert the start
             // node into the parents map for tracking visitation.
-            parents.insert(node, *parent);
+            parents.insert(node, parent);
         }
 
         // Before iterating users, push this node.
@@ -274,7 +282,7 @@ fn preorder_helper(
 
         // Iterate over users.
         for user in forward_sub_cfg.get(&node).unwrap() {
-            (order, parents) = preorder_helper(*user, forward_sub_cfg, order, parents);
+            (order, parents) = preorder_helper(*user, Some(node), forward_sub_cfg, order, parents);
         }
 
         (order, parents)
-- 
GitLab


From 43061d7219fc634e5224972086196ac6ef2bf60e Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 2 Oct 2023 18:47:34 -0500
Subject: [PATCH 081/105] Fix one issue

---
 hercules_ir/src/dom.rs | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 1ec5c6d7..542282fa 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -56,6 +56,12 @@ pub fn dominator(function: &Function) -> DomTree {
     for (number, node) in preorder.iter().enumerate() {
         node_numbers.insert(node, number);
     }
+    parents.insert(NodeID::new(0), NodeID::new(0));
+    println!("Backward: {:?}", backward_sub_cfg);
+    println!("Forward: {:?}", forward_sub_cfg);
+    println!("Preorder: {:?}", preorder);
+    println!("Parents: {:?}", parents);
+    println!("Node Numbers: {:?}", node_numbers);
     let mut idom = HashMap::new();
     for w in preorder[1..].iter() {
         // Each idom starts as the parent node.
@@ -75,30 +81,29 @@ pub fn dominator(function: &Function) -> DomTree {
         // Get ancestors of v, except for the virtual root.
         assert!(eval_stack.is_empty());
         let mut iter = *v;
-        let mut p_iter = parents[v];
         loop {
-            eval_stack.push(iter);
-            iter = p_iter;
-            p_iter = parents[&iter];
-            if node_numbers[&p_iter] < last_linked {
+            eval_stack.push(node_numbers[&iter]);
+            iter = parents[&iter];
+            if node_numbers[&parents[&iter]] < last_linked {
                 break;
             }
         }
 
         // Perform path compression.
-        let mut iter_label_number = labels[node_numbers[&iter]];
-        for node in eval_stack.drain(..).rev() {
-            *parents.get_mut(&node).unwrap() = parents[&iter];
-            let node_label_number = labels[node_numbers[&node]];
-            if node_numbers[&semi[iter_label_number]] < node_numbers[&semi[node_label_number]] {
-                labels[node_numbers[&node]] = labels[node_numbers[&iter]]
+        let mut p_number = node_numbers[&iter];
+        let mut p_label_number = labels[node_numbers[&iter]];
+        for number in eval_stack.drain(..).rev() {
+            *parents.get_mut(&preorder[number]).unwrap() = parents[&preorder[p_number]];
+            let label_number = labels[number];
+            if node_numbers[&semi[p_label_number]] < node_numbers[&semi[label_number]] {
+                labels[number] = labels[p_number]
             } else {
-                iter_label_number = node_label_number;
+                p_label_number = label_number;
             }
-            iter = node;
+            p_number = number;
         }
 
-        return (labels[node_numbers[&iter]], parents, semi);
+        return (labels[p_number], parents, semi);
     };
 
     // Step 4: compute semi-dominators. This implementation is based off of
@@ -107,7 +112,7 @@ pub fn dominator(function: &Function) -> DomTree {
     for w_n in (2..preorder.len()).rev() {
         let w = preorder[w_n];
         semi[w_n] = parents[&w];
-        for v in forward_sub_cfg[&w].iter() {
+        for v in backward_sub_cfg[&w].as_ref() {
             let (new_semi_index, new_parents, new_semi) = eval(&v, w_n + 1, parents, semi);
             parents = new_parents;
             semi = new_semi;
@@ -119,7 +124,7 @@ pub fn dominator(function: &Function) -> DomTree {
     }
 
     println!(
-        "{:?}",
+        "Semi-dominators: {:?}",
         (0..preorder.len())
             .map(|idx| (preorder[idx], semi[idx]))
             .collect::<HashMap<_, _>>()
@@ -136,7 +141,7 @@ pub fn dominator(function: &Function) -> DomTree {
         *idom.get_mut(&w).unwrap() = w_idom_candidate;
     }
 
-    println!("{:?}", idom);
+    println!("Immediate Dominators: {:?}", idom);
 
     DomTree { idom }
 }
-- 
GitLab


From 4241dd3bddf2e199a6f8cf16ab7aef37dd341e35 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 2 Oct 2023 19:11:35 -0500
Subject: [PATCH 082/105] Debugging

---
 hercules_ir/src/dom.rs | 57 +++++++++++++++++++++++++++++++-----------
 hercules_ir/src/ir.rs  | 10 ++++----
 2 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 542282fa..1d1a07ad 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -2,7 +2,7 @@ extern crate bitvec;
 
 use crate::*;
 
-use std::collections::HashMap;
+use std::collections::BTreeMap;
 
 /*
  * Custom type for storing a dominator tree. For each control node, store its
@@ -10,7 +10,7 @@ use std::collections::HashMap;
  */
 #[derive(Debug, Clone)]
 pub struct DomTree {
-    idom: HashMap<NodeID, NodeID>,
+    idom: BTreeMap<NodeID, NodeID>,
 }
 
 impl DomTree {
@@ -52,7 +52,7 @@ pub fn dominator(function: &Function) -> DomTree {
 
     // Step 2: compute pre-order DFS of CFG.
     let (preorder, mut parents) = preorder(&forward_sub_cfg);
-    let mut node_numbers = HashMap::new();
+    let mut node_numbers = BTreeMap::new();
     for (number, node) in preorder.iter().enumerate() {
         node_numbers.insert(node, number);
     }
@@ -62,7 +62,7 @@ pub fn dominator(function: &Function) -> DomTree {
     println!("Preorder: {:?}", preorder);
     println!("Parents: {:?}", parents);
     println!("Node Numbers: {:?}", node_numbers);
-    let mut idom = HashMap::new();
+    let mut idom = BTreeMap::new();
     for w in preorder[1..].iter() {
         // Each idom starts as the parent node.
         idom.insert(*w, parents[w]);
@@ -71,7 +71,8 @@ pub fn dominator(function: &Function) -> DomTree {
     // Step 3: define eval, which will be used to compute semi-dominators.
     let mut eval_stack = vec![];
     let mut labels: Vec<_> = (0..preorder.len()).collect();
-    let mut eval = |v, last_linked, mut parents: HashMap<NodeID, NodeID>, semi: Vec<NodeID>| {
+    let mut eval = |v, last_linked, mut parents: BTreeMap<NodeID, NodeID>, semi: Vec<NodeID>| {
+        println!("Labels: {:?}", labels);
         let p_v = &parents[v];
         let p_v_n = node_numbers[p_v];
         if p_v_n < last_linked {
@@ -109,25 +110,51 @@ pub fn dominator(function: &Function) -> DomTree {
     // Step 4: compute semi-dominators. This implementation is based off of
     // LLVM's dominator implementation.
     let mut semi = vec![NodeID::new(0); preorder.len()];
+    println!("");
     for w_n in (2..preorder.len()).rev() {
         let w = preorder[w_n];
         semi[w_n] = parents[&w];
+        println!("w: {:?}   w_n: {:?}", w, w_n);
+        println!(
+            "Semis: {:?}",
+            (0..preorder.len())
+                .map(|idx| (preorder[idx].idx(), semi[idx].idx()))
+                .collect::<BTreeMap<_, _>>()
+        );
         for v in backward_sub_cfg[&w].as_ref() {
             let (new_semi_index, new_parents, new_semi) = eval(&v, w_n + 1, parents, semi);
             parents = new_parents;
             semi = new_semi;
             let new_semi_node = semi[new_semi_index];
+            let old_semi_node = semi[w_n];
             if node_numbers[&new_semi_node] < node_numbers[&semi[w_n]] {
                 semi[w_n] = new_semi_node;
             }
+            println!(
+                "Semis: {:?} Node: {:?} new_index: {:?} new_node: {:?} old_node: {:?}",
+                (0..preorder.len())
+                    .map(|idx| (preorder[idx].idx(), semi[idx].idx()))
+                    .collect::<BTreeMap<_, _>>(),
+                v,
+                new_semi_index,
+                new_semi_node,
+                old_semi_node,
+            );
         }
+        println!(
+            "Semis: {:?}",
+            (0..preorder.len())
+                .map(|idx| (preorder[idx].idx(), semi[idx].idx()))
+                .collect::<BTreeMap<_, _>>()
+        );
+        println!("");
     }
 
     println!(
-        "Semi-dominators: {:?}",
+        "Semis: {:?}",
         (0..preorder.len())
             .map(|idx| (preorder[idx], semi[idx]))
-            .collect::<HashMap<_, _>>()
+            .collect::<BTreeMap<_, _>>()
     );
 
     // Step 5: compute idom.
@@ -167,7 +194,7 @@ impl<'a> AsRef<[NodeID]> for ControlUses<'a> {
     }
 }
 
-pub type BackwardSubCFG<'a> = HashMap<NodeID, ControlUses<'a>>;
+pub type BackwardSubCFG<'a> = BTreeMap<NodeID, ControlUses<'a>>;
 
 /*
  * Top level function for getting all the control nodes in a function. Also
@@ -177,7 +204,7 @@ pub type BackwardSubCFG<'a> = HashMap<NodeID, ControlUses<'a>>;
 pub fn control_nodes(function: &Function) -> BackwardSubCFG {
     use Node::*;
 
-    let mut control_nodes = HashMap::new();
+    let mut control_nodes = BTreeMap::new();
     for (idx, node) in function.nodes.iter().enumerate() {
         match node {
             Start => {
@@ -223,13 +250,13 @@ pub fn control_nodes(function: &Function) -> BackwardSubCFG {
     control_nodes
 }
 
-pub type ForwardSubCFG = HashMap<NodeID, Vec<NodeID>>;
+pub type ForwardSubCFG = BTreeMap<NodeID, Vec<NodeID>>;
 
 /*
  * Utility for getting def-use edges of sub CFG.
  */
 pub fn reorient_sub_cfg(backward: &BackwardSubCFG) -> ForwardSubCFG {
-    let mut forward = HashMap::new();
+    let mut forward = BTreeMap::new();
 
     // Every control node needs to be a key in forward, even if it has no
     // def-use edges originating from it (the return node), so explicitly add
@@ -249,13 +276,13 @@ pub fn reorient_sub_cfg(backward: &BackwardSubCFG) -> ForwardSubCFG {
     forward
 }
 
-fn preorder(forward_sub_cfg: &ForwardSubCFG) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
+fn preorder(forward_sub_cfg: &ForwardSubCFG) -> (Vec<NodeID>, BTreeMap<NodeID, NodeID>) {
     // Initialize order vector and visited hashmap for tracking which nodes have
     // been visited.
     let order = Vec::with_capacity(forward_sub_cfg.len());
 
     // Explicitly keep track of parents in DFS tree. Doubles as a visited set.
-    let parents = HashMap::new();
+    let parents = BTreeMap::new();
 
     // Order and parents are threaded through arguments / return pair of
     // reverse_postorder_helper for ownership reasons.
@@ -267,8 +294,8 @@ fn preorder_helper(
     parent: Option<NodeID>,
     forward_sub_cfg: &ForwardSubCFG,
     mut order: Vec<NodeID>,
-    mut parents: HashMap<NodeID, NodeID>,
-) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
+    mut parents: BTreeMap<NodeID, NodeID>,
+) -> (Vec<NodeID>, BTreeMap<NodeID, NodeID>) {
     assert!(forward_sub_cfg.contains_key(&node));
     if parents.contains_key(&node) {
         // If already visited, return early.
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index d07484a5..2c943865 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -485,7 +485,7 @@ impl BinaryOperator {
 /*
  * Rust things to make newtyped IDs usable.
  */
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct FunctionID(u32);
 
 impl FunctionID {
@@ -498,7 +498,7 @@ impl FunctionID {
     }
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct NodeID(u32);
 
 impl NodeID {
@@ -511,7 +511,7 @@ impl NodeID {
     }
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct ConstantID(u32);
 
 impl ConstantID {
@@ -524,7 +524,7 @@ impl ConstantID {
     }
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct TypeID(u32);
 
 impl TypeID {
@@ -537,7 +537,7 @@ impl TypeID {
     }
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct DynamicConstantID(u32);
 
 impl DynamicConstantID {
-- 
GitLab


From 8c37334a8574266677595ac0c8fd992df0d46ee6 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 2 Oct 2023 19:25:49 -0500
Subject: [PATCH 083/105] Rework

---
 hercules_ir/src/dom.rs | 108 ++++++++++++++---------------------------
 1 file changed, 37 insertions(+), 71 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 1d1a07ad..77d5817f 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -68,83 +68,52 @@ pub fn dominator(function: &Function) -> DomTree {
         idom.insert(*w, parents[w]);
     }
 
-    // Step 3: define eval, which will be used to compute semi-dominators.
-    let mut eval_stack = vec![];
+    // Step 3: define snca_compress, which will be used to compute semi-
+    // dominators, and initialize various variables.
+    let mut semi = vec![0; preorder.len()];
     let mut labels: Vec<_> = (0..preorder.len()).collect();
-    let mut eval = |v, last_linked, mut parents: BTreeMap<NodeID, NodeID>, semi: Vec<NodeID>| {
-        println!("Labels: {:?}", labels);
-        let p_v = &parents[v];
-        let p_v_n = node_numbers[p_v];
-        if p_v_n < last_linked {
-            return (labels[p_v_n], parents, semi);
-        }
-
-        // Get ancestors of v, except for the virtual root.
-        assert!(eval_stack.is_empty());
-        let mut iter = *v;
-        loop {
-            eval_stack.push(node_numbers[&iter]);
-            iter = parents[&iter];
-            if node_numbers[&parents[&iter]] < last_linked {
-                break;
+    let mut ancestors = vec![0; preorder.len()];
+    fn snca_compress(
+        v_n: usize,
+        mut ancestors: Vec<usize>,
+        mut labels: Vec<usize>,
+    ) -> (Vec<usize>, Vec<usize>) {
+        let u_n = ancestors[v_n];
+
+        if u_n != 0 {
+            (ancestors, labels) = snca_compress(u_n, ancestors, labels);
+            if labels[u_n] < labels[v_n] {
+                labels[v_n] = labels[u_n];
             }
+            ancestors[v_n] = ancestors[u_n];
         }
 
-        // Perform path compression.
-        let mut p_number = node_numbers[&iter];
-        let mut p_label_number = labels[node_numbers[&iter]];
-        for number in eval_stack.drain(..).rev() {
-            *parents.get_mut(&preorder[number]).unwrap() = parents[&preorder[p_number]];
-            let label_number = labels[number];
-            if node_numbers[&semi[p_label_number]] < node_numbers[&semi[label_number]] {
-                labels[number] = labels[p_number]
-            } else {
-                p_label_number = label_number;
-            }
-            p_number = number;
-        }
+        (ancestors, labels)
+    }
 
-        return (labels[p_number], parents, semi);
-    };
-
-    // Step 4: compute semi-dominators. This implementation is based off of
-    // LLVM's dominator implementation.
-    let mut semi = vec![NodeID::new(0); preorder.len()];
-    println!("");
-    for w_n in (2..preorder.len()).rev() {
-        let w = preorder[w_n];
-        semi[w_n] = parents[&w];
-        println!("w: {:?}   w_n: {:?}", w, w_n);
+    // Step 4: compute semi-dominators.
+    for w_n in (1..preorder.len()).rev() {
+        println!("w: {:?}   w_n: {:?}", preorder[w_n], w_n);
         println!(
             "Semis: {:?}",
             (0..preorder.len())
-                .map(|idx| (preorder[idx].idx(), semi[idx].idx()))
+                .map(|idx| (preorder[idx].idx(), preorder[semi[idx]].idx()))
                 .collect::<BTreeMap<_, _>>()
         );
-        for v in backward_sub_cfg[&w].as_ref() {
-            let (new_semi_index, new_parents, new_semi) = eval(&v, w_n + 1, parents, semi);
-            parents = new_parents;
-            semi = new_semi;
-            let new_semi_node = semi[new_semi_index];
-            let old_semi_node = semi[w_n];
-            if node_numbers[&new_semi_node] < node_numbers[&semi[w_n]] {
-                semi[w_n] = new_semi_node;
-            }
-            println!(
-                "Semis: {:?} Node: {:?} new_index: {:?} new_node: {:?} old_node: {:?}",
-                (0..preorder.len())
-                    .map(|idx| (preorder[idx].idx(), semi[idx].idx()))
-                    .collect::<BTreeMap<_, _>>(),
-                v,
-                new_semi_index,
-                new_semi_node,
-                old_semi_node,
-            );
+
+        semi[w_n] = w_n;
+        for v in backward_sub_cfg[&preorder[w_n]].as_ref() {
+            let v_n = node_numbers[&v];
+            (ancestors, labels) = snca_compress(v_n, ancestors, labels);
+            semi[w_n] = std::cmp::min(semi[w_n], labels[v_n]);
         }
+        labels[w_n] = semi[w_n];
+        ancestors[w_n] = node_numbers[&parents[&preorder[w_n]]];
+
         println!(
             "Semis: {:?}",
             (0..preorder.len())
-                .map(|idx| (preorder[idx].idx(), semi[idx].idx()))
+                .map(|idx| (preorder[idx].idx(), preorder[semi[idx]].idx()))
                 .collect::<BTreeMap<_, _>>()
         );
         println!("");
@@ -153,19 +122,16 @@ pub fn dominator(function: &Function) -> DomTree {
     println!(
         "Semis: {:?}",
         (0..preorder.len())
-            .map(|idx| (preorder[idx], semi[idx]))
+            .map(|idx| (preorder[idx].idx(), preorder[semi[idx]].idx()))
             .collect::<BTreeMap<_, _>>()
     );
 
     // Step 5: compute idom.
-    for w_n in 2..preorder.len() {
-        let w = preorder[w_n];
-        let semi_num = node_numbers[&semi[w_n]];
-        let mut w_idom_candidate = idom[&w];
-        while node_numbers[&w_idom_candidate] > semi_num {
-            w_idom_candidate = idom[&w_idom_candidate];
+    for v_n in 1..preorder.len() {
+        let v = preorder[v_n];
+        while node_numbers[&idom[&v]] > semi[v_n] {
+            *idom.get_mut(&v).unwrap() = idom[&idom[&v]];
         }
-        *idom.get_mut(&w).unwrap() = w_idom_candidate;
     }
 
     println!("Immediate Dominators: {:?}", idom);
-- 
GitLab


From 23f4b59f6f10ba9eeefe941d972fa3613b268010 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 2 Oct 2023 19:27:41 -0500
Subject: [PATCH 084/105] Delete prints

---
 hercules_ir/src/dom.rs | 30 ------------------------------
 1 file changed, 30 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 77d5817f..acf5e764 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -57,11 +57,6 @@ pub fn dominator(function: &Function) -> DomTree {
         node_numbers.insert(node, number);
     }
     parents.insert(NodeID::new(0), NodeID::new(0));
-    println!("Backward: {:?}", backward_sub_cfg);
-    println!("Forward: {:?}", forward_sub_cfg);
-    println!("Preorder: {:?}", preorder);
-    println!("Parents: {:?}", parents);
-    println!("Node Numbers: {:?}", node_numbers);
     let mut idom = BTreeMap::new();
     for w in preorder[1..].iter() {
         // Each idom starts as the parent node.
@@ -93,14 +88,6 @@ pub fn dominator(function: &Function) -> DomTree {
 
     // Step 4: compute semi-dominators.
     for w_n in (1..preorder.len()).rev() {
-        println!("w: {:?}   w_n: {:?}", preorder[w_n], w_n);
-        println!(
-            "Semis: {:?}",
-            (0..preorder.len())
-                .map(|idx| (preorder[idx].idx(), preorder[semi[idx]].idx()))
-                .collect::<BTreeMap<_, _>>()
-        );
-
         semi[w_n] = w_n;
         for v in backward_sub_cfg[&preorder[w_n]].as_ref() {
             let v_n = node_numbers[&v];
@@ -109,23 +96,8 @@ pub fn dominator(function: &Function) -> DomTree {
         }
         labels[w_n] = semi[w_n];
         ancestors[w_n] = node_numbers[&parents[&preorder[w_n]]];
-
-        println!(
-            "Semis: {:?}",
-            (0..preorder.len())
-                .map(|idx| (preorder[idx].idx(), preorder[semi[idx]].idx()))
-                .collect::<BTreeMap<_, _>>()
-        );
-        println!("");
     }
 
-    println!(
-        "Semis: {:?}",
-        (0..preorder.len())
-            .map(|idx| (preorder[idx].idx(), preorder[semi[idx]].idx()))
-            .collect::<BTreeMap<_, _>>()
-    );
-
     // Step 5: compute idom.
     for v_n in 1..preorder.len() {
         let v = preorder[v_n];
@@ -134,8 +106,6 @@ pub fn dominator(function: &Function) -> DomTree {
         }
     }
 
-    println!("Immediate Dominators: {:?}", idom);
-
     DomTree { idom }
 }
 
-- 
GitLab


From 0880bbc2cc9a86ec694815ca4bfbdf2336eb3712 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 3 Oct 2023 13:24:14 -0500
Subject: [PATCH 085/105] Switch back to hashmap

---
 hercules_ir/src/dom.rs | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index acf5e764..8a53b116 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -2,7 +2,7 @@ extern crate bitvec;
 
 use crate::*;
 
-use std::collections::BTreeMap;
+use std::collections::HashMap;
 
 /*
  * Custom type for storing a dominator tree. For each control node, store its
@@ -10,7 +10,7 @@ use std::collections::BTreeMap;
  */
 #[derive(Debug, Clone)]
 pub struct DomTree {
-    idom: BTreeMap<NodeID, NodeID>,
+    idom: HashMap<NodeID, NodeID>,
 }
 
 impl DomTree {
@@ -52,12 +52,12 @@ pub fn dominator(function: &Function) -> DomTree {
 
     // Step 2: compute pre-order DFS of CFG.
     let (preorder, mut parents) = preorder(&forward_sub_cfg);
-    let mut node_numbers = BTreeMap::new();
+    let mut node_numbers = HashMap::new();
     for (number, node) in preorder.iter().enumerate() {
         node_numbers.insert(node, number);
     }
     parents.insert(NodeID::new(0), NodeID::new(0));
-    let mut idom = BTreeMap::new();
+    let mut idom = HashMap::new();
     for w in preorder[1..].iter() {
         // Each idom starts as the parent node.
         idom.insert(*w, parents[w]);
@@ -130,7 +130,7 @@ impl<'a> AsRef<[NodeID]> for ControlUses<'a> {
     }
 }
 
-pub type BackwardSubCFG<'a> = BTreeMap<NodeID, ControlUses<'a>>;
+pub type BackwardSubCFG<'a> = HashMap<NodeID, ControlUses<'a>>;
 
 /*
  * Top level function for getting all the control nodes in a function. Also
@@ -140,7 +140,7 @@ pub type BackwardSubCFG<'a> = BTreeMap<NodeID, ControlUses<'a>>;
 pub fn control_nodes(function: &Function) -> BackwardSubCFG {
     use Node::*;
 
-    let mut control_nodes = BTreeMap::new();
+    let mut control_nodes = HashMap::new();
     for (idx, node) in function.nodes.iter().enumerate() {
         match node {
             Start => {
@@ -186,13 +186,13 @@ pub fn control_nodes(function: &Function) -> BackwardSubCFG {
     control_nodes
 }
 
-pub type ForwardSubCFG = BTreeMap<NodeID, Vec<NodeID>>;
+pub type ForwardSubCFG = HashMap<NodeID, Vec<NodeID>>;
 
 /*
  * Utility for getting def-use edges of sub CFG.
  */
 pub fn reorient_sub_cfg(backward: &BackwardSubCFG) -> ForwardSubCFG {
-    let mut forward = BTreeMap::new();
+    let mut forward = HashMap::new();
 
     // Every control node needs to be a key in forward, even if it has no
     // def-use edges originating from it (the return node), so explicitly add
@@ -212,13 +212,13 @@ pub fn reorient_sub_cfg(backward: &BackwardSubCFG) -> ForwardSubCFG {
     forward
 }
 
-fn preorder(forward_sub_cfg: &ForwardSubCFG) -> (Vec<NodeID>, BTreeMap<NodeID, NodeID>) {
+fn preorder(forward_sub_cfg: &ForwardSubCFG) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
     // Initialize order vector and visited hashmap for tracking which nodes have
     // been visited.
     let order = Vec::with_capacity(forward_sub_cfg.len());
 
     // Explicitly keep track of parents in DFS tree. Doubles as a visited set.
-    let parents = BTreeMap::new();
+    let parents = HashMap::new();
 
     // Order and parents are threaded through arguments / return pair of
     // reverse_postorder_helper for ownership reasons.
@@ -230,8 +230,8 @@ fn preorder_helper(
     parent: Option<NodeID>,
     forward_sub_cfg: &ForwardSubCFG,
     mut order: Vec<NodeID>,
-    mut parents: BTreeMap<NodeID, NodeID>,
-) -> (Vec<NodeID>, BTreeMap<NodeID, NodeID>) {
+    mut parents: HashMap<NodeID, NodeID>,
+) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
     assert!(forward_sub_cfg.contains_key(&node));
     if parents.contains_key(&node) {
         // If already visited, return early.
-- 
GitLab


From 37b0e1c90c794e0592179fccf8f0f213f8b5dfd8 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 3 Oct 2023 13:51:58 -0500
Subject: [PATCH 086/105] Generic subgraph code

---
 hercules_ir/src/lib.rs      |   2 +
 hercules_ir/src/subgraph.rs | 134 ++++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 hercules_ir/src/subgraph.rs

diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs
index 8046498a..094873f2 100644
--- a/hercules_ir/src/lib.rs
+++ b/hercules_ir/src/lib.rs
@@ -4,6 +4,7 @@ pub mod dom;
 pub mod dot;
 pub mod ir;
 pub mod parse;
+pub mod subgraph;
 pub mod typecheck;
 pub mod verify;
 
@@ -13,5 +14,6 @@ pub use crate::dom::*;
 pub use crate::dot::*;
 pub use crate::ir::*;
 pub use crate::parse::*;
+pub use crate::subgraph::*;
 pub use crate::typecheck::*;
 pub use crate::verify::*;
diff --git a/hercules_ir/src/subgraph.rs b/hercules_ir/src/subgraph.rs
new file mode 100644
index 00000000..b0a37d82
--- /dev/null
+++ b/hercules_ir/src/subgraph.rs
@@ -0,0 +1,134 @@
+use crate::*;
+
+use std::collections::HashMap;
+
+/*
+ * In various parts of the compiler, we want to consider a subset of a complete
+ * function graph. For example, for dominators, we often only want to find the
+ * dominator tree of only the control subgraph.
+ */
+#[derive(Debug, Clone)]
+pub struct Subgraph {
+    nodes: Vec<NodeID>,
+    node_numbers: HashMap<NodeID, u32>,
+    first_forward_edges: Vec<u32>,
+    forward_edges: Vec<u32>,
+    first_backward_edges: Vec<u32>,
+    backward_edges: Vec<u32>,
+}
+
+/*
+ * Top level subgraph construction routine. Takes a function reference and a
+ * predicate - the predicate selects which nodes from the function will be
+ * included in the subgraph. An edge is added to the subgraph if it's between
+ * two nodes that each pass the predicate.
+ */
+pub fn subgraph<F>(function: &Function, def_use: &ImmutableDefUseMap, predicate: F) -> Subgraph
+where
+    F: Fn(&Node) -> bool,
+{
+    let mut subgraph = Subgraph {
+        nodes: vec![],
+        node_numbers: HashMap::new(),
+        first_forward_edges: vec![],
+        forward_edges: vec![],
+        first_backward_edges: vec![],
+        backward_edges: vec![],
+    };
+
+    // Step 1: collect predicated nodes.
+    for (idx, node) in function.nodes.iter().enumerate() {
+        if predicate(node) {
+            subgraph
+                .node_numbers
+                .insert(NodeID::new(idx), subgraph.nodes.len() as u32);
+            subgraph.nodes.push(NodeID::new(idx));
+        }
+    }
+
+    // Step 2: collect backwards edges. This is fairly easy, since use-def
+    // edges are explicitly stored.
+    for id in subgraph.nodes.iter() {
+        subgraph
+            .first_backward_edges
+            .push(subgraph.backward_edges.len() as u32);
+        let uses = get_uses(&function.nodes[id.idx()]);
+        for use_id in uses.as_ref() {
+            // Any predecessor node that satisfies the predicate already got
+            // added to node numbers. We need to get the node number anyway,
+            // so we don't have to do a redundant predicate check.
+            if let Some(number) = subgraph.node_numbers.get(use_id) {
+                subgraph.backward_edges.push(*number);
+            }
+        }
+    }
+
+    // Step 3: collect forwards edges. This is also easy, since we already have
+    // the def-use edges of this function.
+    for id in subgraph.nodes.iter() {
+        subgraph
+            .first_forward_edges
+            .push(subgraph.forward_edges.len() as u32);
+
+        // Only difference is that we iterate over users, not uses.
+        let users = def_use.get_users(*id);
+        for user_id in users.as_ref() {
+            // Any successor node that satisfies the predicate already got
+            // added to node numbers. We need to get the node number anyway,
+            // so we don't have to do a redundant predicate check.
+            if let Some(number) = subgraph.node_numbers.get(user_id) {
+                subgraph.forward_edges.push(*number);
+            }
+        }
+    }
+
+    subgraph
+}
+
+/*
+ * Get the control subgraph of a function.
+ */
+pub fn control_subgraph(function: &Function, def_use: &ImmutableDefUseMap) -> Subgraph {
+    use Node::*;
+
+    subgraph(function, def_use, |node| match node {
+        Start
+        | Region { preds: _ }
+        | If {
+            control: _,
+            cond: _,
+        }
+        | Fork {
+            control: _,
+            factor: _,
+        }
+        | Join {
+            control: _,
+            data: _,
+        }
+        | Return {
+            control: _,
+            value: _,
+        }
+        | Match { control: _, sum: _ } => true,
+        ReadProd { prod, index } => match function.nodes[prod.idx()] {
+            // ReadProd nodes are control nodes if their predecessor is a
+            // legal control node, and if it's the right index.
+            Match { control: _, sum: _ }
+            | If {
+                control: _,
+                cond: _,
+            } => true,
+            Fork {
+                control: _,
+                factor: _,
+            }
+            | Join {
+                control: _,
+                data: _,
+            } => *index == 0,
+            _ => false,
+        },
+        _ => false,
+    })
+}
-- 
GitLab


From 70c8c1fb57b9625fc45702dc6704edaa7cacc8a8 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 3 Oct 2023 14:15:03 -0500
Subject: [PATCH 087/105] Rewrite dominator analysis to use generic subgraph

---
 hercules_ir/src/dom.rs      | 136 ++++--------------------------------
 hercules_ir/src/subgraph.rs |  62 ++++++++++++++++
 hercules_ir/src/verify.rs   |   5 +-
 3 files changed, 79 insertions(+), 124 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 8a53b116..1d0c880f 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -44,14 +44,9 @@ impl DomTree {
  * Top level function for calculating dominator trees. Uses the semi-NCA
  * algorithm, as described in "Finding Dominators in Practice".
  */
-pub fn dominator(function: &Function) -> DomTree {
-    // Step 1: compute the sub-CFG for the function. This is the graph the
-    // dominator tree will be built for.
-    let backward_sub_cfg = control_nodes(function);
-    let forward_sub_cfg = reorient_sub_cfg(&backward_sub_cfg);
-
-    // Step 2: compute pre-order DFS of CFG.
-    let (preorder, mut parents) = preorder(&forward_sub_cfg);
+pub fn dominator(function: &Function, subgraph: &Subgraph) -> DomTree {
+    // Step 1: compute pre-order DFS of subgraph.
+    let (preorder, mut parents) = preorder(&subgraph);
     let mut node_numbers = HashMap::new();
     for (number, node) in preorder.iter().enumerate() {
         node_numbers.insert(node, number);
@@ -63,7 +58,7 @@ pub fn dominator(function: &Function) -> DomTree {
         idom.insert(*w, parents[w]);
     }
 
-    // Step 3: define snca_compress, which will be used to compute semi-
+    // Step 2: define snca_compress, which will be used to compute semi-
     // dominators, and initialize various variables.
     let mut semi = vec![0; preorder.len()];
     let mut labels: Vec<_> = (0..preorder.len()).collect();
@@ -86,10 +81,10 @@ pub fn dominator(function: &Function) -> DomTree {
         (ancestors, labels)
     }
 
-    // Step 4: compute semi-dominators.
+    // Step 3: compute semi-dominators.
     for w_n in (1..preorder.len()).rev() {
         semi[w_n] = w_n;
-        for v in backward_sub_cfg[&preorder[w_n]].as_ref() {
+        for v in subgraph.preds(preorder[w_n]) {
             let v_n = node_numbers[&v];
             (ancestors, labels) = snca_compress(v_n, ancestors, labels);
             semi[w_n] = std::cmp::min(semi[w_n], labels[v_n]);
@@ -98,7 +93,7 @@ pub fn dominator(function: &Function) -> DomTree {
         ancestors[w_n] = node_numbers[&parents[&preorder[w_n]]];
     }
 
-    // Step 5: compute idom.
+    // Step 4: compute idom.
     for v_n in 1..preorder.len() {
         let v = preorder[v_n];
         while node_numbers[&idom[&v]] > semi[v_n] {
@@ -109,130 +104,27 @@ pub fn dominator(function: &Function) -> DomTree {
     DomTree { idom }
 }
 
-/*
- * Enum for storing control uses of a node. Calculated alongside control nodes
- * in control_nodes.
- */
-#[derive(Debug, Clone)]
-pub enum ControlUses<'a> {
-    Zero,
-    One([NodeID; 1]),
-    Variable(&'a Box<[NodeID]>),
-}
-
-impl<'a> AsRef<[NodeID]> for ControlUses<'a> {
-    fn as_ref(&self) -> &[NodeID] {
-        match self {
-            ControlUses::Zero => &[],
-            ControlUses::One(x) => x,
-            ControlUses::Variable(x) => x,
-        }
-    }
-}
-
-pub type BackwardSubCFG<'a> = HashMap<NodeID, ControlUses<'a>>;
-
-/*
- * Top level function for getting all the control nodes in a function. Also
- * returns the control uses of each control node, in effect returning the
- * control subset of the IR graph.
- */
-pub fn control_nodes(function: &Function) -> BackwardSubCFG {
-    use Node::*;
-
-    let mut control_nodes = HashMap::new();
-    for (idx, node) in function.nodes.iter().enumerate() {
-        match node {
-            Start => {
-                control_nodes.insert(NodeID::new(idx), ControlUses::Zero);
-            }
-            Region { preds } => {
-                control_nodes.insert(NodeID::new(idx), ControlUses::Variable(&preds));
-            }
-            If { control, cond: _ }
-            | Fork { control, factor: _ }
-            | Join { control, data: _ }
-            | Return { control, value: _ }
-            | Match { control, sum: _ } => {
-                control_nodes.insert(NodeID::new(idx), ControlUses::One([*control]));
-            }
-            ReadProd { prod, index } => match function.nodes[prod.idx()] {
-                // ReadProd nodes are control nodes if their predecessor is a
-                // legal control node, and if it's the right index.
-                Match { control: _, sum: _ }
-                | If {
-                    control: _,
-                    cond: _,
-                } => {
-                    control_nodes.insert(NodeID::new(idx), ControlUses::One([*prod]));
-                }
-                Fork {
-                    control: _,
-                    factor: _,
-                }
-                | Join {
-                    control: _,
-                    data: _,
-                } => {
-                    if *index == 0 {
-                        control_nodes.insert(NodeID::new(idx), ControlUses::One([*prod]));
-                    }
-                }
-                _ => {}
-            },
-            _ => {}
-        }
-    }
-    control_nodes
-}
-
-pub type ForwardSubCFG = HashMap<NodeID, Vec<NodeID>>;
-
-/*
- * Utility for getting def-use edges of sub CFG.
- */
-pub fn reorient_sub_cfg(backward: &BackwardSubCFG) -> ForwardSubCFG {
-    let mut forward = HashMap::new();
-
-    // Every control node needs to be a key in forward, even if it has no
-    // def-use edges originating from it (the return node), so explicitly add
-    // them all here.
-    for key in backward.keys() {
-        forward.insert(*key, vec![]);
-    }
-
-    // Then, insert def-use edges. Unwrap since all keys are initialized above
-    // with empty vectors.
-    for (user, defs) in backward.iter() {
-        for def in defs.as_ref() {
-            forward.get_mut(def).unwrap().push(*user);
-        }
-    }
-
-    forward
-}
-
-fn preorder(forward_sub_cfg: &ForwardSubCFG) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
+fn preorder(subgraph: &Subgraph) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
     // Initialize order vector and visited hashmap for tracking which nodes have
     // been visited.
-    let order = Vec::with_capacity(forward_sub_cfg.len());
+    let order = Vec::with_capacity(subgraph.num_nodes() as usize);
 
     // Explicitly keep track of parents in DFS tree. Doubles as a visited set.
     let parents = HashMap::new();
 
     // Order and parents are threaded through arguments / return pair of
     // reverse_postorder_helper for ownership reasons.
-    preorder_helper(NodeID::new(0), None, forward_sub_cfg, order, parents)
+    preorder_helper(NodeID::new(0), None, subgraph, order, parents)
 }
 
 fn preorder_helper(
     node: NodeID,
     parent: Option<NodeID>,
-    forward_sub_cfg: &ForwardSubCFG,
+    subgraph: &Subgraph,
     mut order: Vec<NodeID>,
     mut parents: HashMap<NodeID, NodeID>,
 ) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
-    assert!(forward_sub_cfg.contains_key(&node));
+    assert!(subgraph.contains_node(node));
     if parents.contains_key(&node) {
         // If already visited, return early.
         (order, parents)
@@ -249,8 +141,8 @@ fn preorder_helper(
         order.push(node);
 
         // Iterate over users.
-        for user in forward_sub_cfg.get(&node).unwrap() {
-            (order, parents) = preorder_helper(*user, Some(node), forward_sub_cfg, order, parents);
+        for user in subgraph.succs(node) {
+            (order, parents) = preorder_helper(user, Some(node), subgraph, order, parents);
         }
 
         (order, parents)
diff --git a/hercules_ir/src/subgraph.rs b/hercules_ir/src/subgraph.rs
index b0a37d82..d90852a7 100644
--- a/hercules_ir/src/subgraph.rs
+++ b/hercules_ir/src/subgraph.rs
@@ -17,6 +17,68 @@ pub struct Subgraph {
     backward_edges: Vec<u32>,
 }
 
+pub struct SubgraphIterator<'a> {
+    nodes: &'a Vec<NodeID>,
+    edges: &'a [u32],
+}
+
+impl<'a> Iterator for SubgraphIterator<'a> {
+    type Item = NodeID;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.edges.len() == 0 {
+            None
+        } else {
+            let id = self.edges[0];
+            self.edges = &self.edges[1..];
+            Some(self.nodes[id as usize])
+        }
+    }
+}
+
+impl Subgraph {
+    pub fn num_nodes(&self) -> u32 {
+        self.nodes.len() as u32
+    }
+
+    pub fn contains_node(&self, id: NodeID) -> bool {
+        self.node_numbers.contains_key(&id)
+    }
+
+    pub fn preds(&self, id: NodeID) -> SubgraphIterator {
+        let number = self.node_numbers[&id];
+        if ((number + 1) as usize) < self.first_backward_edges.len() {
+            SubgraphIterator {
+                nodes: &self.nodes,
+                edges: &self.backward_edges[(self.first_backward_edges[number as usize] as usize)
+                    ..(self.first_backward_edges[number as usize + 1] as usize)],
+            }
+        } else {
+            SubgraphIterator {
+                nodes: &self.nodes,
+                edges: &self.backward_edges
+                    [(self.first_backward_edges[number as usize] as usize)..],
+            }
+        }
+    }
+
+    pub fn succs(&self, id: NodeID) -> SubgraphIterator {
+        let number = self.node_numbers[&id];
+        if ((number + 1) as usize) < self.first_forward_edges.len() {
+            SubgraphIterator {
+                nodes: &self.nodes,
+                edges: &self.forward_edges[(self.first_forward_edges[number as usize] as usize)
+                    ..(self.first_forward_edges[number as usize + 1] as usize)],
+            }
+        } else {
+            SubgraphIterator {
+                nodes: &self.nodes,
+                edges: &self.forward_edges[(self.first_forward_edges[number as usize] as usize)..],
+            }
+        }
+    }
+}
+
 /*
  * Top level subgraph construction routine. Takes a function reference and a
  * predicate - the predicate selects which nodes from the function will be
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index 383dd3d1..65bac05b 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -33,8 +33,9 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
     }
 
     // Check SSA, fork, and join dominance relations.
-    for function in module.functions.iter() {
-        let dom = dominator(&function);
+    for (function, def_use) in zip(module.functions.iter(), def_uses) {
+        let subgraph = control_subgraph(function, &def_use);
+        let dom = dominator(&function, &subgraph);
     }
 
     Ok(typing)
-- 
GitLab


From e340f128a83a009eec68162cbf81787ce2052faf Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Tue, 3 Oct 2023 21:26:38 -0500
Subject: [PATCH 088/105] Postdominator

---
 hercules_ir/src/dom.rs      | 26 ++++++++++++---
 hercules_ir/src/subgraph.rs | 63 +++++++++++++++++++++++++++++++++++++
 hercules_ir/src/verify.rs   |  5 ++-
 3 files changed, 88 insertions(+), 6 deletions(-)

diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 1d0c880f..002cbd04 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -44,14 +44,14 @@ impl DomTree {
  * Top level function for calculating dominator trees. Uses the semi-NCA
  * algorithm, as described in "Finding Dominators in Practice".
  */
-pub fn dominator(function: &Function, subgraph: &Subgraph) -> DomTree {
+pub fn dominator(subgraph: &Subgraph, root: NodeID) -> DomTree {
     // Step 1: compute pre-order DFS of subgraph.
-    let (preorder, mut parents) = preorder(&subgraph);
+    let (preorder, mut parents) = preorder(&subgraph, root);
     let mut node_numbers = HashMap::new();
     for (number, node) in preorder.iter().enumerate() {
         node_numbers.insert(node, number);
     }
-    parents.insert(NodeID::new(0), NodeID::new(0));
+    parents.insert(root, root);
     let mut idom = HashMap::new();
     for w in preorder[1..].iter() {
         // Each idom starts as the parent node.
@@ -104,7 +104,7 @@ pub fn dominator(function: &Function, subgraph: &Subgraph) -> DomTree {
     DomTree { idom }
 }
 
-fn preorder(subgraph: &Subgraph) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
+fn preorder(subgraph: &Subgraph, root: NodeID) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
     // Initialize order vector and visited hashmap for tracking which nodes have
     // been visited.
     let order = Vec::with_capacity(subgraph.num_nodes() as usize);
@@ -114,7 +114,7 @@ fn preorder(subgraph: &Subgraph) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) {
 
     // Order and parents are threaded through arguments / return pair of
     // reverse_postorder_helper for ownership reasons.
-    preorder_helper(NodeID::new(0), None, subgraph, order, parents)
+    preorder_helper(root, None, subgraph, order, parents)
 }
 
 fn preorder_helper(
@@ -148,3 +148,19 @@ fn preorder_helper(
         (order, parents)
     }
 }
+
+/*
+ * Top level function for calculating post-dominator trees. Reverses the edges
+ * in the subgraph, and then runs normal dominator analysis. Takes an owned
+ * subgraph, since we need to reverse it. Also take a fake root node ID to
+ * insert in the reversed subgraph. This will be the root of the resulting
+ * dominator tree.
+ */
+pub fn postdominator(subgraph: Subgraph, fake_root: NodeID) -> DomTree {
+    // Step 1: reverse the subgraph.
+    let reversed_subgraph = subgraph.reverse(fake_root);
+
+    // Step 2: run dominator analysis on the reversed subgraph. Use the fake
+    // root as the root of the dominator analysis.
+    dominator(&reversed_subgraph, fake_root)
+}
diff --git a/hercules_ir/src/subgraph.rs b/hercules_ir/src/subgraph.rs
index d90852a7..97203ab8 100644
--- a/hercules_ir/src/subgraph.rs
+++ b/hercules_ir/src/subgraph.rs
@@ -77,6 +77,69 @@ impl Subgraph {
             }
         }
     }
+
+    pub fn reverse(self, new_root: NodeID) -> Self {
+        let Subgraph {
+            mut nodes,
+            mut node_numbers,
+            first_forward_edges,
+            forward_edges,
+            mut first_backward_edges,
+            mut backward_edges,
+        } = self;
+
+        // Since we need to add a "new" root to the subgraph, we first need to
+        // identify all the nodes with no forward edges. We're going to
+        // simultaneously add the new backward edges from the old leaves to the
+        // new root.
+        let mut leaf_numbers = vec![];
+        let mut new_first_forward_edges = vec![];
+        let mut new_forward_edges = vec![];
+        let mut old_forward_edges_idx = 0;
+        for number in 0..nodes.len() as u32 {
+            new_first_forward_edges.push(new_forward_edges.len() as u32);
+            let num_edges = if ((number + 1) as usize) < first_forward_edges.len() {
+                first_forward_edges[number as usize + 1] - first_forward_edges[number as usize]
+            } else {
+                forward_edges.len() as u32 - first_forward_edges[number as usize]
+            };
+            if num_edges == 0 {
+                // Node number of new root will be largest in subgraph.
+                new_forward_edges.push(nodes.len() as u32);
+                leaf_numbers.push(number);
+            } else {
+                for _ in 0..num_edges {
+                    new_forward_edges.push(forward_edges[old_forward_edges_idx]);
+                    old_forward_edges_idx += 1;
+                }
+            }
+        }
+
+        // There are no backward edges from the root node.
+        new_first_forward_edges.push(new_forward_edges.len() as u32);
+
+        // To reverse the edges in the graph, just swap the forward and backward
+        // edge vectors. Thus, we add the forward edges from the new root to
+        // the old leaves in the backward edge arrays.
+        node_numbers.insert(new_root, nodes.len() as u32);
+        nodes.push(new_root);
+        first_backward_edges.push(backward_edges.len() as u32);
+        for leaf in leaf_numbers.iter() {
+            backward_edges.push(*leaf);
+        }
+
+        // Swap forward and backward edges.
+        assert!(nodes.len() == first_backward_edges.len());
+        assert!(nodes.len() == new_first_forward_edges.len());
+        Subgraph {
+            nodes,
+            node_numbers,
+            first_forward_edges: first_backward_edges,
+            forward_edges: backward_edges,
+            first_backward_edges: new_first_forward_edges,
+            backward_edges: new_forward_edges,
+        }
+    }
 }
 
 /*
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index 65bac05b..a76e08a1 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -35,7 +35,10 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
     // Check SSA, fork, and join dominance relations.
     for (function, def_use) in zip(module.functions.iter(), def_uses) {
         let subgraph = control_subgraph(function, &def_use);
-        let dom = dominator(&function, &subgraph);
+        let dom = dominator(&subgraph, NodeID::new(0));
+        let postdom = postdominator(subgraph, NodeID::new(function.nodes.len()));
+        println!("{:?}", dom);
+        println!("{:?}", postdom);
     }
 
     Ok(typing)
-- 
GitLab


From a1f2cbba9674f28b97350f6eca68857c3189ca41 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 4 Oct 2023 10:46:57 -0500
Subject: [PATCH 089/105] Add thread ID and collect nodes

---
 hercules_ir/src/dataflow.rs  |   6 +-
 hercules_ir/src/def_use.rs   |   6 +-
 hercules_ir/src/dot.rs       |  32 ++++++--
 hercules_ir/src/ir.rs        |  57 ++++++++------
 hercules_ir/src/parse.rs     |  32 ++++++--
 hercules_ir/src/subgraph.rs  |  15 +---
 hercules_ir/src/typecheck.rs | 149 ++++++++++++++++++++++-------------
 hercules_ir/src/verify.rs    |  31 +++++---
 samples/matmul.hir           |  20 ++---
 9 files changed, 216 insertions(+), 132 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 9dfa6867..69a00b65 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -31,7 +31,7 @@ pub fn forward_dataflow<L, F>(
 ) -> Vec<L>
 where
     L: Semilattice,
-    F: FnMut(&[&L], &Node) -> L,
+    F: FnMut(&[&L], NodeID) -> L,
 {
     // Step 1: compute NodeUses for each node in function.
     let uses: Vec<NodeUses> = function.nodes.iter().map(|n| get_uses(n)).collect();
@@ -41,7 +41,7 @@ where
         .map(|id| {
             flow_function(
                 &vec![&(if id == 0 { L::bottom() } else { L::top() }); uses[id].as_ref().len()],
-                &function.nodes[id],
+                NodeID::new(id),
             )
         })
         .collect();
@@ -63,7 +63,7 @@ where
             }
 
             // Compute new "out" value from predecessor "out" values.
-            let new_out = flow_function(&pred_outs[..], &function.nodes[node_id.idx()]);
+            let new_out = flow_function(&pred_outs[..], *node_id);
             if outs[node_id.idx()] != new_out {
                 change = true;
             }
diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs
index 0e750ca5..1eb578fa 100644
--- a/hercules_ir/src/def_use.rs
+++ b/hercules_ir/src/def_use.rs
@@ -108,13 +108,15 @@ pub fn get_uses<'a>(node: &'a Node) -> NodeUses<'a> {
         Node::Region { preds } => NodeUses::Variable(preds),
         Node::If { control, cond } => NodeUses::Two([*control, *cond]),
         Node::Fork { control, factor: _ } => NodeUses::One([*control]),
-        Node::Join { control, data } => NodeUses::Two([*control, *data]),
+        Node::Join { control } => NodeUses::One([*control]),
         Node::Phi { control, data } => {
             let mut uses: Vec<NodeID> = Vec::from(&data[..]);
             uses.push(*control);
             NodeUses::Phi(uses.into_boxed_slice())
         }
-        Node::Return { control, value } => NodeUses::Two([*control, *value]),
+        Node::ThreadID { control } => NodeUses::One([*control]),
+        Node::Collect { control, data } => NodeUses::Two([*control, *data]),
+        Node::Return { control, data } => NodeUses::Two([*control, *data]),
         Node::Parameter { index: _ } => NodeUses::Zero,
         Node::Constant { id: _ } => NodeUses::Zero,
         Node::DynamicConstant { id: _ } => NodeUses::Zero,
diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs
index 8623ca5e..f149a50d 100644
--- a/hercules_ir/src/dot.rs
+++ b/hercules_ir/src/dot.rs
@@ -94,16 +94,14 @@ fn write_node<W: std::fmt::Write>(
                 )?;
                 visited
             }
-            Node::Join { control, data } => {
+            Node::Join { control } => {
                 write!(w, "{} [xlabel={}, label=\"join\"];\n", name, j)?;
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
-                let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
                 write!(
                     w,
                     "{} -> {} [label=\"control\", style=\"dashed\"];\n",
                     control_name, name
                 )?;
-                write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
                 visited
             }
             Node::Phi { control, data } => {
@@ -121,16 +119,38 @@ fn write_node<W: std::fmt::Write>(
                 }
                 visited
             }
-            Node::Return { control, value } => {
+            Node::ThreadID { control } => {
+                write!(w, "{} [xlabel={}, label=\"thread_id\"];\n", name, j)?;
+                let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
+                write!(
+                    w,
+                    "{} -> {} [label=\"control\", style=\"dashed\"];\n",
+                    control_name, name
+                )?;
+                visited
+            }
+            Node::Collect { control, data } => {
+                let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
+                let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
+                write!(w, "{} [xlabel={}, label=\"collect\"];\n", name, j)?;
+                write!(
+                    w,
+                    "{} -> {} [label=\"control\", style=\"dashed\"];\n",
+                    control_name, name
+                )?;
+                write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
+                visited
+            }
+            Node::Return { control, data } => {
                 let (control_name, visited) = write_node(i, control.idx(), module, visited, w)?;
-                let (value_name, visited) = write_node(i, value.idx(), module, visited, w)?;
+                let (data_name, visited) = write_node(i, data.idx(), module, visited, w)?;
                 write!(w, "{} [xlabel={}, label=\"return\"];\n", name, j)?;
                 write!(
                     w,
                     "{} -> {} [label=\"control\", style=\"dashed\"];\n",
                     control_name, name
                 )?;
-                write!(w, "{} -> {} [label=\"value\"];\n", value_name, name)?;
+                write!(w, "{} -> {} [label=\"data\"];\n", data_name, name)?;
                 visited
             }
             Node::Parameter { index } => {
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 2c943865..9400f026 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -145,13 +145,13 @@ pub enum Constant {
 
 /*
  * Dynamic constants are unsigned 64-bit integers passed to a Hercules function
- * at runtime using the Hercules runtime API. They cannot be the result of
+ * at runtime using the Hercules conductor API. They cannot be the result of
  * computations in Hercules IR. For a single execution of a Hercules function,
  * dynamic constants are constant throughout execution. This provides a
  * mechanism by which Hercules functions can operate on arrays with variable
  * length, while not needing Hercules functions to perform dynamic memory
- * allocation - by providing dynamic constants to the runtime API, the runtime
- * can allocate memory as necessary.
+ * allocation - by providing dynamic constants to the conductor API, the
+ * conductor can allocate memory as necessary.
  */
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum DynamicConstant {
@@ -161,15 +161,14 @@ pub enum DynamicConstant {
 
 /*
  * Hercules IR is a combination of a possibly cylic control flow graph, and
- * many acyclic data flow graphs. Each node represents some operation on input
- * values (including control), and produces some output value. Operations that
- * conceptually produce multiple outputs (such as an if node) produce a product
- * type instead. For example, the if node produces prod(control(N),
+ * many possibly cyclic data flow graphs. Each node represents some operation on
+ * input values (including control), and produces some output value. Operations
+ * that conceptually produce multiple outputs (such as an if node) produce a
+ * product type instead. For example, the if node produces prod(control(N),
  * control(N)), where the first control token represents the false branch, and
- * the second control token represents the true branch. Another example is the
- * fork node, which produces prod(control(N, K), u64), where the u64 is the
- * thread ID. Functions are devoid of side effects, so call nodes don't take as
- * input or output control tokens. There is also no global memory - use arrays.
+ * the second control token represents the true branch. Functions are devoid of
+ * side effects, so call nodes don't take as input or output control tokens.
+ * There is also no global memory - use arrays.
  */
 #[derive(Debug, Clone)]
 pub enum Node {
@@ -187,15 +186,21 @@ pub enum Node {
     },
     Join {
         control: NodeID,
-        data: NodeID,
     },
     Phi {
         control: NodeID,
         data: Box<[NodeID]>,
     },
+    ThreadID {
+        control: NodeID,
+    },
+    Collect {
+        control: NodeID,
+        data: NodeID,
+    },
     Return {
         control: NodeID,
-        value: NodeID,
+        data: NodeID,
     },
     Parameter {
         index: usize,
@@ -284,7 +289,7 @@ impl Node {
     pub fn is_return(&self) -> bool {
         if let Node::Return {
             control: _,
-            value: _,
+            data: _,
         } = self
         {
             true
@@ -305,17 +310,19 @@ impl Node {
                 control: _,
                 factor: _,
             } => "Fork",
-            Node::Join {
-                control: _,
-                data: _,
-            } => "Join",
+            Node::Join { control: _ } => "Join",
             Node::Phi {
                 control: _,
                 data: _,
             } => "Phi",
+            Node::ThreadID { control: _ } => "ThreadID",
+            Node::Collect {
+                control: _,
+                data: _,
+            } => "Collect",
             Node::Return {
                 control: _,
-                value: _,
+                data: _,
             } => "Return",
             Node::Parameter { index: _ } => "Parameter",
             Node::DynamicConstant { id: _ } => "DynamicConstant",
@@ -368,17 +375,19 @@ impl Node {
                 control: _,
                 factor: _,
             } => "fork",
-            Node::Join {
-                control: _,
-                data: _,
-            } => "join",
+            Node::Join { control: _ } => "join",
             Node::Phi {
                 control: _,
                 data: _,
             } => "phi",
+            Node::ThreadID { control: _ } => "thread_id",
+            Node::Collect {
+                control: _,
+                data: _,
+            } => "collect",
             Node::Return {
                 control: _,
-                value: _,
+                data: _,
             } => "return",
             Node::Parameter { index: _ } => "parameter",
             Node::DynamicConstant { id: _ } => "dynamic_constant",
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index a54f8109..fcabe771 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -275,6 +275,8 @@ fn parse_node<'a>(
         "fork" => parse_fork(ir_text, context)?,
         "join" => parse_join(ir_text, context)?,
         "phi" => parse_phi(ir_text, context)?,
+        "thread_id" => parse_thread_id(ir_text, context)?,
+        "collect" => parse_collect(ir_text, context)?,
         "return" => parse_return(ir_text, context)?,
         "constant" => parse_constant_node(ir_text, context)?,
         "dynamic_constant" => parse_dynamic_constant_node(ir_text, context)?,
@@ -365,15 +367,14 @@ fn parse_fork<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
 }
 
 fn parse_join<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (control, data)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let (ir_text, (control,)) = parse_tuple1(parse_identifier)(ir_text)?;
     let control = context.borrow_mut().get_node_id(control);
-    let data = context.borrow_mut().get_node_id(data);
 
     // A join node doesn't need to explicitly store a join factor. The join
     // factor is implicitly stored at the tail of the control token's type
     // level list of thread spawn factors. Intuitively, fork pushes to the end
     // of this list, while join just pops from the end of this list.
-    Ok((ir_text, Node::Join { control, data }))
+    Ok((ir_text, Node::Join { control }))
 }
 
 fn parse_phi<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResult<&'a str, Node> {
@@ -396,14 +397,33 @@ fn parse_phi<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IResu
     Ok((ir_text, Node::Phi { control, data }))
 }
 
+fn parse_thread_id<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (control,)) = parse_tuple1(parse_identifier)(ir_text)?;
+    let control = context.borrow_mut().get_node_id(control);
+    Ok((ir_text, Node::ThreadID { control }))
+}
+
+fn parse_collect<'a>(
+    ir_text: &'a str,
+    context: &RefCell<Context<'a>>,
+) -> nom::IResult<&'a str, Node> {
+    let (ir_text, (control, data)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let control = context.borrow_mut().get_node_id(control);
+    let data = context.borrow_mut().get_node_id(data);
+    Ok((ir_text, Node::Collect { control, data }))
+}
+
 fn parse_return<'a>(
     ir_text: &'a str,
     context: &RefCell<Context<'a>>,
 ) -> nom::IResult<&'a str, Node> {
-    let (ir_text, (control, value)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
+    let (ir_text, (control, data)) = parse_tuple2(parse_identifier, parse_identifier)(ir_text)?;
     let control = context.borrow_mut().get_node_id(control);
-    let value = context.borrow_mut().get_node_id(value);
-    Ok((ir_text, Node::Return { control, value }))
+    let data = context.borrow_mut().get_node_id(data);
+    Ok((ir_text, Node::Return { control, data }))
 }
 
 fn parse_constant_node<'a>(
diff --git a/hercules_ir/src/subgraph.rs b/hercules_ir/src/subgraph.rs
index 97203ab8..a75bc637 100644
--- a/hercules_ir/src/subgraph.rs
+++ b/hercules_ir/src/subgraph.rs
@@ -227,13 +227,10 @@ pub fn control_subgraph(function: &Function, def_use: &ImmutableDefUseMap) -> Su
             control: _,
             factor: _,
         }
-        | Join {
-            control: _,
-            data: _,
-        }
+        | Join { control: _ }
         | Return {
             control: _,
-            value: _,
+            data: _,
         }
         | Match { control: _, sum: _ } => true,
         ReadProd { prod, index } => match function.nodes[prod.idx()] {
@@ -244,14 +241,6 @@ pub fn control_subgraph(function: &Function, def_use: &ImmutableDefUseMap) -> Su
                 control: _,
                 cond: _,
             } => true,
-            Fork {
-                control: _,
-                factor: _,
-            }
-            | Join {
-                control: _,
-                data: _,
-            } => *index == 0,
             _ => false,
         },
         _ => false,
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index cab2f250..6722a66d 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -98,6 +98,10 @@ pub fn typecheck(
         .map(|(idx, ty)| (ty.clone(), TypeID::new(idx)))
         .collect();
 
+    // Also create a join replication factor map. This is needed to typecheck
+    // collect node.
+    let mut join_factor_map: HashMap<NodeID, DynamicConstantID> = HashMap::new();
+
     // Step 2: run dataflow. This is an occurrence of dataflow where the flow
     // function performs a non-associative operation on the predecessor "out"
     // values.
@@ -113,6 +117,7 @@ pub fn typecheck(
                     constants,
                     dynamic_constants,
                     &mut reverse_type_map,
+                    &mut join_factor_map,
                 )
             })
         })
@@ -143,13 +148,14 @@ pub fn typecheck(
  */
 fn typeflow(
     inputs: &[&TypeSemilattice],
-    node: &Node,
+    node_id: NodeID,
     function: &Function,
     functions: &Vec<Function>,
     types: &mut Vec<Type>,
     constants: &Vec<Constant>,
     dynamic_constants: &Vec<DynamicConstant>,
     reverse_type_map: &mut HashMap<Type, TypeID>,
+    join_factor_map: &mut HashMap<NodeID, DynamicConstantID>,
 ) -> TypeSemilattice {
     // Whenever we want to reference a specific type (for example, for the
     // start node), we need to get its type ID. This helper function gets the
@@ -170,7 +176,7 @@ fn typeflow(
     // Each node requires different type logic. This unfortunately results in a
     // large match statement. Oh well. Each arm returns the lattice value for
     // the "out" type of the node.
-    match node {
+    match &function.nodes[node_id.idx()] {
         Node::Start => {
             if inputs.len() != 0 {
                 return Error(String::from("Start node must have zero inputs."));
@@ -253,18 +259,14 @@ fn typeflow(
                     let mut new_factors = factors.clone().into_vec();
                     new_factors.push(*factor);
 
-                    // Out type is a pair - first element is the control type,
-                    // second is the index type (u64). Each thread gets a
-                    // different thread ID at runtime.
+                    // Out type is control type, with the new thread spawn
+                    // factor.
                     let control_out_id = get_type_id(
                         Type::Control(new_factors.into_boxed_slice()),
                         types,
                         reverse_type_map,
                     );
-                    let index_out_id =
-                        get_type_id(Type::UnsignedInteger64, types, reverse_type_map);
-                    let out_ty = Type::Product(Box::new([control_out_id, index_out_id]));
-                    return Concrete(get_type_id(out_ty, types, reverse_type_map));
+                    return Concrete(control_out_id);
                 } else {
                     return Error(String::from(
                         "Fork node's input cannot have non-control type.",
@@ -274,59 +276,38 @@ fn typeflow(
 
             inputs[0].clone()
         }
-        Node::Join {
-            control: _,
-            data: _,
-        } => {
-            if inputs.len() != 2 {
+        Node::Join { control: _ } => {
+            if inputs.len() != 1 {
                 return Error(String::from("Join node must have exactly two inputs."));
             }
 
-            // If the data input isn't concrete, we can't assemble a concrete
-            // output type yet, so just return data input's type (either
-            // unconstrained or error) instead.
-            if let Concrete(data_id) = inputs[1] {
-                if types[data_id.idx()].is_control() {
-                    return Error(String::from(
-                        "Join node's second input must not have a control type.",
-                    ));
-                }
-
-                // Similarly, if the control input isn't concrete yet, we can't
-                // assemble a concrete output type, so just return the control
-                // input non-concrete type.
-                if let Concrete(control_id) = inputs[0] {
-                    if let Type::Control(factors) = &types[control_id.idx()] {
-                        // Join removes a factor from the factor list.
-                        if factors.len() == 0 {
-                            return Error(String::from("Join node's first input must have a control type with at least one thread replication factor."));
-                        }
-                        let mut new_factors = factors.clone().into_vec();
-                        let dc_id = new_factors.pop().unwrap();
-
-                        // Out type is a pair - first element is the control
-                        // type, second is the result array from the parallel
-                        // computation.
-                        let control_out_id = get_type_id(
-                            Type::Control(new_factors.into_boxed_slice()),
-                            types,
-                            reverse_type_map,
-                        );
-                        let array_out_id =
-                            get_type_id(Type::Array(*data_id, dc_id), types, reverse_type_map);
-                        let out_ty = Type::Product(Box::new([control_out_id, array_out_id]));
-                        return Concrete(get_type_id(out_ty, types, reverse_type_map));
-                    } else {
-                        return Error(String::from(
-                            "Join node's first input cannot have non-control type.",
-                        ));
+            // If the control input isn't concrete yet, we can't assemble a
+            // concrete output type, so just return the control input non-
+            // concrete type.
+            if let Concrete(control_id) = inputs[0] {
+                if let Type::Control(factors) = &types[control_id.idx()] {
+                    // Join removes a factor from the factor list.
+                    if factors.len() == 0 {
+                        return Error(String::from("Join node's first input must have a control type with at least one thread replication factor."));
                     }
+                    let mut new_factors = factors.clone().into_vec();
+                    join_factor_map.insert(node_id, new_factors.pop().unwrap());
+
+                    // Out type is the new control type.
+                    let control_out_id = get_type_id(
+                        Type::Control(new_factors.into_boxed_slice()),
+                        types,
+                        reverse_type_map,
+                    );
+                    return Concrete(control_out_id);
                 } else {
-                    return inputs[0].clone();
+                    return Error(String::from(
+                        "Join node's first input cannot have non-control type.",
+                    ));
                 }
             }
 
-            inputs[1].clone()
+            inputs[0].clone()
         }
         Node::Phi {
             control: _,
@@ -365,9 +346,67 @@ fn typeflow(
 
             meet
         }
+        Node::ThreadID { control: _ } => {
+            if inputs.len() != 1 {
+                return Error(String::from("ThreadID node must have exactly one input."));
+            }
+
+            // If type of control input is an error, we must propagate it.
+            if inputs[0].is_error() {
+                return inputs[0].clone();
+            }
+
+            // Type of thread ID is always u64.
+            Concrete(get_type_id(
+                Type::UnsignedInteger64,
+                types,
+                reverse_type_map,
+            ))
+        }
+        Node::Collect { control, data: _ } => {
+            if inputs.len() != 2 {
+                return Error(String::from("Collect node must have exactly two inputs."));
+            }
+
+            if let (Concrete(control_id), Concrete(data_id)) = (inputs[0], inputs[1]) {
+                // Check control input is control.
+                if let Type::Control(_) = types[control_id.idx()] {
+                } else {
+                    return Error(String::from(
+                        "Collect node's control input must have control type.",
+                    ));
+                }
+
+                // Check data input isn't control.
+                if let Type::Control(_) = types[data_id.idx()] {
+                    return Error(String::from(
+                        "Collect node's data input must not have control type.",
+                    ));
+                }
+
+                // Unfortunately, the type of the control input doesn't contain
+                // the thread replication factor this collect node is operating
+                // with. We use the join replication factor map side data
+                // structure to store the replication factor each join reduces
+                // over to make this easier.
+                if let Some(factor) = join_factor_map.get(control) {
+                    let array_out_id =
+                        get_type_id(Type::Array(*data_id, *factor), types, reverse_type_map);
+                    Concrete(array_out_id)
+                } else {
+                    // If the join factor map doesn't contain the control
+                    // input, stay optimistic.
+                    Unconstrained
+                }
+            } else if inputs[0].is_error() {
+                inputs[0].clone()
+            } else {
+                inputs[1].clone()
+            }
+        }
         Node::Return {
             control: _,
-            value: _,
+            data: _,
         } => {
             if inputs.len() != 2 {
                 return Error(String::from("Return node must have exactly two inputs."));
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index a76e08a1..89192c01 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -58,20 +58,11 @@ fn verify_structure(
     for (idx, node) in function.nodes.iter().enumerate() {
         let users = def_use.get_users(NodeID::new(idx));
         match node {
-            // If, fork, and join nodes all have the same structural
-            // constraints - each must have exactly two ReadProd users, which
+            // Each if node must have exactly two ReadProd users, which
             // reference differing elements of the node's output product.
             Node::If {
                 control: _,
                 cond: _,
-            }
-            | Node::Fork {
-                control: _,
-                factor: _,
-            }
-            | Node::Join {
-                control: _,
-                data: _,
             } => {
                 if users.len() != 2 {
                     Err(format!(
@@ -110,10 +101,28 @@ fn verify_structure(
                     Err("Phi node's control input must be a region node.")?;
                 }
             }
+            // ThreadID nodes must depend on a fork node.
+            Node::ThreadID { control } => {
+                if let Node::Fork {
+                    control: _,
+                    factor: _,
+                } = function.nodes[control.idx()]
+                {
+                } else {
+                    Err("ThreadID node's control input must be a fork node.")?;
+                }
+            }
+            // Collect nodes must depend on a join node.
+            Node::Collect { control, data: _ } => {
+                if let Node::Join { control: _ } = function.nodes[control.idx()] {
+                } else {
+                    Err("Collect node's control input must be a join node.")?;
+                }
+            }
             // Return nodes must have no users.
             Node::Return {
                 control: _,
-                value: _,
+                data: _,
             } => {
                 if users.len() != 0 {
                     Err(format!(
diff --git a/samples/matmul.hir b/samples/matmul.hir
index 511bdfa8..af13ce95 100644
--- a/samples/matmul.hir
+++ b/samples/matmul.hir
@@ -1,10 +1,8 @@
 fn matmul<3>(a: array(array(f32, #1), #0), b: array(array(f32, #2), #1)) -> array(array(f32, #2), #0)
-  i = fork(start, #0)
-  i_ctrl = read_prod(i, 0)
-  i_idx = read_prod(i, 1)
-  k = fork(i_ctrl, #2)
-  k_ctrl = read_prod(k, 0)
-  k_idx = read_prod(k, 1)
+  i_ctrl = fork(start, #0)
+  i_idx = thread_id(i_ctrl)
+  k_ctrl = fork(i_ctrl, #2)
+  k_idx = thread_id(k_ctrl)
   zero_idx = constant(u64, 0)
   one_idx = constant(u64, 1)
   zero_val = constant(f32, 0)
@@ -23,10 +21,8 @@ fn matmul<3>(a: array(array(f32, #1), #0), b: array(array(f32, #2), #1)) -> arra
   if = if(loop, less)
   if_false = read_prod(if, 0)
   if_true = read_prod(if, 1)
-  k_join = join(if_false, sum_inc)
-  k_join_ctrl = read_prod(k_join, 0)
-  k_join_data = read_prod(k_join, 1)
-  i_join = join(k_join_ctrl, k_join_data)
-  i_join_ctrl = read_prod(i_join, 0)
-  i_join_data = read_prod(i_join, 1)
+  k_join_ctrl = join(if_false)
+  k_join_data = collect(k_join_ctrl, sum_inc)
+  i_join_ctrl = join(k_join_ctrl)
+  i_join_data = collect(i_join_ctrl, k_join_data)
   r = return(i_join_ctrl, i_join_data)
-- 
GitLab


From 54894503423fef8841e6caff88a008039f35dd34 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 4 Oct 2023 14:33:53 -0500
Subject: [PATCH 090/105] Update design.md

---
 DESIGN.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/DESIGN.md b/DESIGN.md
index cd51e630..10e34d64 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -24,15 +24,19 @@ The Hercules' compiler is split into the following components:
 
 The IR of the Hercules compiler is similar to the sea of nodes IR presented in "A Simple Graph-Based Intermediate Representation", with a few differences.
 
-- There are dynamic constants, which are constants provided dynamically to the runtime system - these can be used to specify array type sizes, unlike normal runtime values.
+- There are dynamic constants, which are constants provided dynamically to the conductor (this is the runtime system, [see the section describing the conductor](#the-conductor)) - these can be used to specify array type sizes, unlike normal runtime values.
 - There is no single global store. The closest analog are individual values with an array type, which support dynamically indexed read and write operations.
 - There is no I/O, or other side effects.
 - There is no recursion.
-- The implementation of Hercules IR does not follow the original object oriented design.
+- The implementation of Hercules IR does not follow the original object oriented design of sea-of-nodes.
 
 A key design consideration of Hercules IR is the absence of a concept of memory. A downside of this approach is that any language targetting Hecules IR must also be very restrictive regarding memory - in practice, this means tightly controlling or eliminating first-class references. The upside is that the compiler has complete freedom to layout data however it likes in memory when performing code generation. This includes deciding which data resides in which address spaces, which is a necessary ability for a compiler striving to have fine-grained control over what operations are computed on what devices.
 
-In addition to not having a generalized memory, Hercules IR has no functionality for calling functions with side-effects, or doing IO. In other words, Hercules is a pure IR (it's not functional, as functions aren't first class values). This may be changed in the future - we could support effectful programs by giving call operators a control input and output edge. However, at least for now, we need to work with the simplest IR possible, so the IR is pure.
+In addition to not having a generalized memory, Hercules IR has no functionality for calling functions with side-effects, or doing IO. In other words, Hercules is a pure IR (it's not functional, as functions aren't first class values). This may be changed in the future - we could support effectful programs by giving call operators a control input and output edge. However, at least for now, we'd like to work with the simplest IR possible, so the IR is pure.
+
+The key idea behind the sea of nodes IR is that control flow and data flow are represented in the same graph. The entire program thus can be represented by one large flow graph. This has several nice properties, the primary of which being that instructions are unordered except by true dependencies. This alleviates most code motion concerns, and also makes peephole optimizations more practical. Additionally, loop invariant code is neither "inside" nor "outside" a loop in the sea of nodes. Thus, any optimizations benefitting from a particular assumption about the position of loop invariant code works without needing to do code motion. Deciding whether code lives inside a loop or not becomes a scheduling concern.
+
+We chose to use a sea of nodes based IR because we believe it will be easier to partition than a CFG + basic block style IR. A CFG + basic block IR is inherently two-level - there is the control flow level in the CFG, and the data flow in the basic blocks. Partitioning a function across these two levels is a challenging task. As shown by previous work (HPVM), introducing more graph levels into the IR makes partitioning harder, not easier. We want Hercules to have fine-grained control over which code executes where. This requires Hercules' compiler IR to have as few graph levels as reasonable.
 
 ### Optimizations
 
@@ -42,7 +46,7 @@ TODO: @rarbore2
 
 ### Partitioning
 
-Partitioning is responsible for deciding which operations in the IR graph are executed on which devices. Additionally, operations are broken up into shards - every node in a shard executes on the same device, and the runtime system schedules execution at the shard level. Partitioning is conceptually very similar to instruction selection. Each shard can be thought of as a single instruction, and the device the shard is executed on can be thought of as the particular instruction being selected. In instruction selection, there is not only the choice of which instructions to use, but also how to partition the potentially many operations in the IR into a smaller number of target instructions. Similarly, partitioning Hercules IR must decide which operations are grouped together into the same shard, and for each shard, which device it should execute on. The set of operations each potential target device is capable of executing is crucial information when forming the shard boundaries, so this cannot be performed optimally as a sequential two step process.
+Partitioning is responsible for deciding which operations in the IR graph are executed on which devices. Additionally, operations are broken up into shards - every node in a shard executes on the same device, and the runtime system schedules execution at the shard level. Partitioning is conceptually very similar to instruction selection. Each shard can be thought of as a single instruction, and the device the shard is executed on can be thought of as the particular instruction being selected. In instruction selection, there is not only the choice of which instructions to use, but also how to partition the potentially many operations in the IR into a smaller number of target instructions. Similarly, the Hercules IR partitioning process must decide which operations are grouped together into the same shard, and for each shard, which device it should execute on. The set of operations each potential target device is capable of executing is crucial information when forming the shard boundaries, so this cannot be performed optimally as a sequential two step process.
 
 TODO: @rarbore2
 
@@ -52,8 +56,8 @@ Hercules uses LLVM for generating CPU and GPU code. Memory is "introduced" into
 
 TODO: @rarbore2
 
-## Runtime System
+## The Conductor
 
-The runtime system is responsible for dynamically executing code generated by Hercules. It exposes a Rust API for executing Hercules code. It takes care of memory allocation, synchronization, and scheduling.
+The conductor is responsible for dynamically executing code generated by Hercules. It exposes a Rust API for executing Hercules code. It takes care of memory allocation, synchronization, and scheduling. It is what is called the "runtime" in other systems - we chose a different name as there are events that happen distinctly as "conductor time" (such as providing dynamic constants), rather than at "runtime" (where the generated code is actually executed).
 
 TODO: @rarbore2
-- 
GitLab


From fdc42d76257d3a1f113f57243af77203cbf3f6f6 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 4 Oct 2023 16:50:51 -0500
Subject: [PATCH 091/105] Update DESIGN.md

---
 DESIGN.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/DESIGN.md b/DESIGN.md
index 10e34d64..8764c9fe 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -8,6 +8,8 @@ Hercules' is a compiler targeting heterogenous devices. The key goals of Hercule
 - Design an intermediate representation that allows for fine-grained control of what code is executed on what device in a system.
 - Develop a runtime system capable of dynamically scheduling generated code fragments on a heterogenous machine.
 
+The following sections contain information on how Hercules is designed to meet these goals.
+
 ## Front-end Language Design
 
 TODO: @aaronjc4
@@ -38,6 +40,14 @@ The key idea behind the sea of nodes IR is that control flow and data flow are r
 
 We chose to use a sea of nodes based IR because we believe it will be easier to partition than a CFG + basic block style IR. A CFG + basic block IR is inherently two-level - there is the control flow level in the CFG, and the data flow in the basic blocks. Partitioning a function across these two levels is a challenging task. As shown by previous work (HPVM), introducing more graph levels into the IR makes partitioning harder, not easier. We want Hercules to have fine-grained control over which code executes where. This requires Hercules' compiler IR to have as few graph levels as reasonable.
 
+Hercules IR is structured as following:
+- One entire program lives in one "Module".
+- Each module contains a set of functions, as well as interned types, constants, and dynamic constants. The most important element of a module is its resident functions.
+- Each function consists of a name, a set of types for its parameters, a return type, a list of nodes, and the number of dynamic constants it takes as argument. Types are not needed for dynamic constants, since all dynamic constants have type u64. The most important element of a function is its node list.
+- There are control and data types. The control type is parameterized by a list of thread replication factors. The primitive data types are boolean, signed integers, unsigned integers, and floating point numbers. The integer types can hold 8, 16, 32, or 64 bits. The floating point types can hold 32 or 64 bits. The compound types are product, summation, and arrays. A product type is a tuple, containing some number of children data types. A summation type is a union, containing exactly one of some number of children data types at runtime. An array is a dynamically indexable collection of elements, where each element is the same type. The size of the array is part of the type, and is represented with a dynamic constant.
+- Dynamic constants are constants provided to the conductor when a Hercules IR program is started. Through this mechanism, Hercules IR can represent programs operating on a variable number of array elements, while forbidding runtime dynamic memory allocation (all dynamic memory allocation happens in the conductor).
+- The nodes in a function are structured as a flow graph, which an explicit start node. Although control and data flow from definitions to uses, def-use edges are stored implicitly in the IR. Each node stores its predecessor nodes, so use-def edges are stored explicitly. To query the def-use edges in an IR graph, use the `def_use` function.
+
 ### Optimizations
 
 Hercules relies on other compiler infrastructures, such as LLVM, to do code generation for specific devices. Thus, Hercules itself doesn't perform particularly sophisticated optimizations. In general, the optimizations Hercules do are done to make partitioning easier. This includes things like GVN and peephole optimizations, which in general, make the IR "simpler".
-- 
GitLab


From 92438ba3fccc50f3dc85b5a9860655b44f04f8c9 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 4 Oct 2023 17:16:13 -0500
Subject: [PATCH 092/105] Start documenting nodes

---
 DESIGN.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/DESIGN.md b/DESIGN.md
index 8764c9fe..49738e69 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -48,6 +48,28 @@ Hercules IR is structured as following:
 - Dynamic constants are constants provided to the conductor when a Hercules IR program is started. Through this mechanism, Hercules IR can represent programs operating on a variable number of array elements, while forbidding runtime dynamic memory allocation (all dynamic memory allocation happens in the conductor).
 - The nodes in a function are structured as a flow graph, which an explicit start node. Although control and data flow from definitions to uses, def-use edges are stored implicitly in the IR. Each node stores its predecessor nodes, so use-def edges are stored explicitly. To query the def-use edges in an IR graph, use the `def_use` function.
 
+Below, all of the nodes in Hercules IR are described.
+
+#### Start
+
+The start node of the IR flow graph. This node is implicitly defined in the text format. It takes no inputs. Its output type is the empty control type (control with no thread replication factors).
+
+#### Region
+
+Region nodes are the mechanism for merging multiple branches inside Hercules IR. A region node takes at least one input - each input must have a control type, and all of the inputs must have the same control type. The output type of the region node is the same control type as all of its inputs. The main purpose of a region node is to drive a [phi](#phi) node.
+
+#### If
+
+The branch mechanism in Hercules IR. An if node takes two inputs - a control predecessor, and a condition. The control predecessor must have control type, and the condition must have boolean type. The output type is a product of two control types, which are the same as the control input's type. Every if node must be followed directly by two [read_prod](#readprod) nodes, each of which reads differing elements of the if node's output product. This is the mechanism by which the output edges from the if node (and also the [match](#match) node) are labelled, even though nodes only explicitly store their input edges.
+
+#### Fork
+
+Fork (and [join](#join)) nodes are the mechanism for representing data-parallelism inside Hercules IR. A fork node takes one input - a control predecessor. A fork node also stores a thread replication factor (TRF), represented as a dynamic constant. The output type of a fork node is a control type, which is the same as the type of the control predecessor, with the TRF pushed to the end of the control type's factor list. Conceptually, for every thread that comes in to a fork node, TRF threads come out. A fork node can drive any number of children [thread_id](#threadid) nodes. Each fork must have a single corresponding [join](#join) node - the fork must dominate the join node, and the join node must post-dominate the fork node (in the control flow subgraph).
+
+#### Join
+
+Join (and [fork](#fork)) nodes are the mechanism for synchronizing data-parallel threads inside Hercules IR. A join nodes takes one input - a control predecessor. The output type of a join node is a control type, which is the same as the type of the control predecessor, with the last factor in the control type's list removed. Conceptually, after all threads created by the corresponding fork reach the join, then and only then does the join output a single thread. A join node can drive any number of children [collect](#collect) nodes. Each join must have a single corresponding [fork](#fork) node - the join must post-dominate the fork node, and the fork node must dominate the join node (in the control flow subgraph).
+
 ### Optimizations
 
 Hercules relies on other compiler infrastructures, such as LLVM, to do code generation for specific devices. Thus, Hercules itself doesn't perform particularly sophisticated optimizations. In general, the optimizations Hercules do are done to make partitioning easier. This includes things like GVN and peephole optimizations, which in general, make the IR "simpler".
-- 
GitLab


From 839c09105d7fd416061936f8d8a3228250e08876 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 4 Oct 2023 17:43:26 -0500
Subject: [PATCH 093/105] Document more nodes

---
 DESIGN.md | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/DESIGN.md b/DESIGN.md
index 49738e69..8f18d3a3 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -56,20 +56,48 @@ The start node of the IR flow graph. This node is implicitly defined in the text
 
 #### Region
 
-Region nodes are the mechanism for merging multiple branches inside Hercules IR. A region node takes at least one input - each input must have a control type, and all of the inputs must have the same control type. The output type of the region node is the same control type as all of its inputs. The main purpose of a region node is to drive a [phi](#phi) node.
+Region nodes are the mechanism for merging multiple branches inside Hercules IR. A region node takes at least one input - each input must have a control type, and all of the inputs must have the same control type. The output type of the region node is the same control type as all of its inputs. The main purpose of a region node is to drive some number of [phi](#phi) nodes.
 
 #### If
 
-The branch mechanism in Hercules IR. An if node takes two inputs - a control predecessor, and a condition. The control predecessor must have control type, and the condition must have boolean type. The output type is a product of two control types, which are the same as the control input's type. Every if node must be followed directly by two [read_prod](#readprod) nodes, each of which reads differing elements of the if node's output product. This is the mechanism by which the output edges from the if node (and also the [match](#match) node) are labelled, even though nodes only explicitly store their input edges.
+The branch mechanism in Hercules IR. An if node takes two inputs - a control predecessor, and a condition. The control predecessor must have control type, and the condition must have boolean type. The output type is a product of two control types, which are the same as the control input's type. Every if node must be followed directly by two [read\_prod](#readprod) nodes, each of which reads differing elements of the if node's output product. This is the mechanism by which the output edges from the if node (and also the [match](#match) node) are labelled, even though nodes only explicitly store their input edges.
 
 #### Fork
 
-Fork (and [join](#join)) nodes are the mechanism for representing data-parallelism inside Hercules IR. A fork node takes one input - a control predecessor. A fork node also stores a thread replication factor (TRF), represented as a dynamic constant. The output type of a fork node is a control type, which is the same as the type of the control predecessor, with the TRF pushed to the end of the control type's factor list. Conceptually, for every thread that comes in to a fork node, TRF threads come out. A fork node can drive any number of children [thread_id](#threadid) nodes. Each fork must have a single corresponding [join](#join) node - the fork must dominate the join node, and the join node must post-dominate the fork node (in the control flow subgraph).
+Fork (and [join](#join)) nodes are the mechanism for representing data-parallelism inside Hercules IR. A fork node takes one input - a control predecessor. A fork node also stores a thread replication factor (TRF), represented as a dynamic constant. The output type of a fork node is a control type, which is the same as the type of the control predecessor, with the TRF pushed to the end of the control type's factor list. Conceptually, for every thread that comes in to a fork node, TRF threads come out. A fork node can drive any number of children [thread\_id](#threadid) nodes. Each fork must have a single corresponding [join](#join) node - the fork must dominate the join node, and the join node must post-dominate the fork node (in the control flow subgraph).
 
 #### Join
 
 Join (and [fork](#fork)) nodes are the mechanism for synchronizing data-parallel threads inside Hercules IR. A join nodes takes one input - a control predecessor. The output type of a join node is a control type, which is the same as the type of the control predecessor, with the last factor in the control type's list removed. Conceptually, after all threads created by the corresponding fork reach the join, then and only then does the join output a single thread. A join node can drive any number of children [collect](#collect) nodes. Each join must have a single corresponding [fork](#fork) node - the join must post-dominate the fork node, and the fork node must dominate the join node (in the control flow subgraph).
 
+#### Phi
+
+Phi nodes merge potentially many data sources into one data output, driven by a corresponding region node. Phi nodes in Hercules IR perform the same function as phi nodes in other SSA-based IRs. Phi nodes take at least one input - a control predecessor, and some number of data inputs. The control predecessor of a phi node must be a region node. The data inputs must all have the same type. The output of the phi node has that data type. In the sea of nodes execution model, a phi node can be thought of as "latching" when its corresponding region node is reached. The phi node will latch to output the value of the input corresponding to the input that control traversed to reach the region node. After latching, the phi node's output won't change until the region node is reached again.
+
+#### ThreadID
+
+The thread\_id node provides the thread ID as a datum to children nodes after a [fork](#fork) has been performed. A thread\_id node takes one input - a control predecessor. The control predecessor must be a [fork](#fork) node. The output type is a 64-bit unsigned integer. The output thread IDs generated by a thread\_id node range from 0 to TRF - 1, inclusive, where TRF is the thread replication factor of the input [fork](#fork) node.
+
+#### Collect
+
+The collect node collects data from multiple executing threads, and puts them all into an array. A collect node takes two inputs - a control predecessor, and a data input. The control predecessor must be a [join](#join) node. The data input must have a non-control type. The output type will be an array, where the element type will be the type of the data input. The extent of the array will be equal to the thread replication factor of the [fork](#fork) node corresponding to the input [join](#join) node. For each datum input, the thread ID corresponding to that datum will be the index the datum is inserted into the array.
+
+#### Return
+
+The return node returns some data from the current function. A return node has two inputs - a control predecessor, and a data input. The control predecessor must have a control type with an empty factor list - just as only one thread starts the execution of a function, only one thread can return from a function. The data input must have the same type as the function's return type. No node should use a return node as input (technically, the output type of a return node is an empty product type).
+
+#### Parameter
+
+The parameter node represents a parameter of the function. A parameter node takes no inputs. A parameter node stores the parameter index of the function it corresponds to. Its value at runtime is the index-th argument to the function. Its output type is the type of the index-th parameter of the function.
+
+#### Constant
+
+The constant node represents a constant value. A constant node takes no inputs. A constant node stores the constant ID of the constant it corresponds to. Its value at runtime is the constant it references. Its output type is the type of the constant it references.
+
+#### DynamicConstant
+
+The dynamic\_constant node represents a dynamic constant, used as a runtime value. A dynamic\_constant node takes no inputs. A dynamic\_constant node stores the dynamic constant ID of the dynamic constant it corresponds to. Its value at runtime is the value of the dynamic constant it references, which is calculated at conductor time. Its output type is a 64-bit unsigned integer.
+
 ### Optimizations
 
 Hercules relies on other compiler infrastructures, such as LLVM, to do code generation for specific devices. Thus, Hercules itself doesn't perform particularly sophisticated optimizations. In general, the optimizations Hercules do are done to make partitioning easier. This includes things like GVN and peephole optimizations, which in general, make the IR "simpler".
-- 
GitLab


From 3e9ea061393750c810efce50fe2b16c24585d065 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Wed, 4 Oct 2023 21:30:47 -0500
Subject: [PATCH 094/105] Move IR docs to separate markdown file

---
 DESIGN.md | 58 +----------------------------------------------
 IR.md     | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 57 deletions(-)
 create mode 100644 IR.md

diff --git a/DESIGN.md b/DESIGN.md
index 8f18d3a3..c34c3023 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -40,63 +40,7 @@ The key idea behind the sea of nodes IR is that control flow and data flow are r
 
 We chose to use a sea of nodes based IR because we believe it will be easier to partition than a CFG + basic block style IR. A CFG + basic block IR is inherently two-level - there is the control flow level in the CFG, and the data flow in the basic blocks. Partitioning a function across these two levels is a challenging task. As shown by previous work (HPVM), introducing more graph levels into the IR makes partitioning harder, not easier. We want Hercules to have fine-grained control over which code executes where. This requires Hercules' compiler IR to have as few graph levels as reasonable.
 
-Hercules IR is structured as following:
-- One entire program lives in one "Module".
-- Each module contains a set of functions, as well as interned types, constants, and dynamic constants. The most important element of a module is its resident functions.
-- Each function consists of a name, a set of types for its parameters, a return type, a list of nodes, and the number of dynamic constants it takes as argument. Types are not needed for dynamic constants, since all dynamic constants have type u64. The most important element of a function is its node list.
-- There are control and data types. The control type is parameterized by a list of thread replication factors. The primitive data types are boolean, signed integers, unsigned integers, and floating point numbers. The integer types can hold 8, 16, 32, or 64 bits. The floating point types can hold 32 or 64 bits. The compound types are product, summation, and arrays. A product type is a tuple, containing some number of children data types. A summation type is a union, containing exactly one of some number of children data types at runtime. An array is a dynamically indexable collection of elements, where each element is the same type. The size of the array is part of the type, and is represented with a dynamic constant.
-- Dynamic constants are constants provided to the conductor when a Hercules IR program is started. Through this mechanism, Hercules IR can represent programs operating on a variable number of array elements, while forbidding runtime dynamic memory allocation (all dynamic memory allocation happens in the conductor).
-- The nodes in a function are structured as a flow graph, which an explicit start node. Although control and data flow from definitions to uses, def-use edges are stored implicitly in the IR. Each node stores its predecessor nodes, so use-def edges are stored explicitly. To query the def-use edges in an IR graph, use the `def_use` function.
-
-Below, all of the nodes in Hercules IR are described.
-
-#### Start
-
-The start node of the IR flow graph. This node is implicitly defined in the text format. It takes no inputs. Its output type is the empty control type (control with no thread replication factors).
-
-#### Region
-
-Region nodes are the mechanism for merging multiple branches inside Hercules IR. A region node takes at least one input - each input must have a control type, and all of the inputs must have the same control type. The output type of the region node is the same control type as all of its inputs. The main purpose of a region node is to drive some number of [phi](#phi) nodes.
-
-#### If
-
-The branch mechanism in Hercules IR. An if node takes two inputs - a control predecessor, and a condition. The control predecessor must have control type, and the condition must have boolean type. The output type is a product of two control types, which are the same as the control input's type. Every if node must be followed directly by two [read\_prod](#readprod) nodes, each of which reads differing elements of the if node's output product. This is the mechanism by which the output edges from the if node (and also the [match](#match) node) are labelled, even though nodes only explicitly store their input edges.
-
-#### Fork
-
-Fork (and [join](#join)) nodes are the mechanism for representing data-parallelism inside Hercules IR. A fork node takes one input - a control predecessor. A fork node also stores a thread replication factor (TRF), represented as a dynamic constant. The output type of a fork node is a control type, which is the same as the type of the control predecessor, with the TRF pushed to the end of the control type's factor list. Conceptually, for every thread that comes in to a fork node, TRF threads come out. A fork node can drive any number of children [thread\_id](#threadid) nodes. Each fork must have a single corresponding [join](#join) node - the fork must dominate the join node, and the join node must post-dominate the fork node (in the control flow subgraph).
-
-#### Join
-
-Join (and [fork](#fork)) nodes are the mechanism for synchronizing data-parallel threads inside Hercules IR. A join nodes takes one input - a control predecessor. The output type of a join node is a control type, which is the same as the type of the control predecessor, with the last factor in the control type's list removed. Conceptually, after all threads created by the corresponding fork reach the join, then and only then does the join output a single thread. A join node can drive any number of children [collect](#collect) nodes. Each join must have a single corresponding [fork](#fork) node - the join must post-dominate the fork node, and the fork node must dominate the join node (in the control flow subgraph).
-
-#### Phi
-
-Phi nodes merge potentially many data sources into one data output, driven by a corresponding region node. Phi nodes in Hercules IR perform the same function as phi nodes in other SSA-based IRs. Phi nodes take at least one input - a control predecessor, and some number of data inputs. The control predecessor of a phi node must be a region node. The data inputs must all have the same type. The output of the phi node has that data type. In the sea of nodes execution model, a phi node can be thought of as "latching" when its corresponding region node is reached. The phi node will latch to output the value of the input corresponding to the input that control traversed to reach the region node. After latching, the phi node's output won't change until the region node is reached again.
-
-#### ThreadID
-
-The thread\_id node provides the thread ID as a datum to children nodes after a [fork](#fork) has been performed. A thread\_id node takes one input - a control predecessor. The control predecessor must be a [fork](#fork) node. The output type is a 64-bit unsigned integer. The output thread IDs generated by a thread\_id node range from 0 to TRF - 1, inclusive, where TRF is the thread replication factor of the input [fork](#fork) node.
-
-#### Collect
-
-The collect node collects data from multiple executing threads, and puts them all into an array. A collect node takes two inputs - a control predecessor, and a data input. The control predecessor must be a [join](#join) node. The data input must have a non-control type. The output type will be an array, where the element type will be the type of the data input. The extent of the array will be equal to the thread replication factor of the [fork](#fork) node corresponding to the input [join](#join) node. For each datum input, the thread ID corresponding to that datum will be the index the datum is inserted into the array.
-
-#### Return
-
-The return node returns some data from the current function. A return node has two inputs - a control predecessor, and a data input. The control predecessor must have a control type with an empty factor list - just as only one thread starts the execution of a function, only one thread can return from a function. The data input must have the same type as the function's return type. No node should use a return node as input (technically, the output type of a return node is an empty product type).
-
-#### Parameter
-
-The parameter node represents a parameter of the function. A parameter node takes no inputs. A parameter node stores the parameter index of the function it corresponds to. Its value at runtime is the index-th argument to the function. Its output type is the type of the index-th parameter of the function.
-
-#### Constant
-
-The constant node represents a constant value. A constant node takes no inputs. A constant node stores the constant ID of the constant it corresponds to. Its value at runtime is the constant it references. Its output type is the type of the constant it references.
-
-#### DynamicConstant
-
-The dynamic\_constant node represents a dynamic constant, used as a runtime value. A dynamic\_constant node takes no inputs. A dynamic\_constant node stores the dynamic constant ID of the dynamic constant it corresponds to. Its value at runtime is the value of the dynamic constant it references, which is calculated at conductor time. Its output type is a 64-bit unsigned integer.
+See [IR.md](IR.md) for a more specific description of Hercules IR.
 
 ### Optimizations
 
diff --git a/IR.md b/IR.md
new file mode 100644
index 00000000..e93f9517
--- /dev/null
+++ b/IR.md
@@ -0,0 +1,67 @@
+# Hercules IR
+
+Hercules IR is structured as following:
+- One entire program lives in one "Module".
+- Each module contains a set of functions, as well as interned types, constants, and dynamic constants. The most important element of a module is its resident functions.
+- Each function consists of a name, a set of types for its parameters, a return type, a list of nodes, and the number of dynamic constants it takes as argument. Types are not needed for dynamic constants, since all dynamic constants have type u64. The most important element of a function is its node list.
+- There are control and data types. The control type is parameterized by a list of thread replication factors. The primitive data types are boolean, signed integers, unsigned integers, and floating point numbers. The integer types can hold 8, 16, 32, or 64 bits. The floating point types can hold 32 or 64 bits. The compound types are product, summation, and arrays. A product type is a tuple, containing some number of children data types. A summation type is a union, containing exactly one of some number of children data types at runtime. An array is a dynamically indexable collection of elements, where each element is the same type. The size of the array is part of the type, and is represented with a dynamic constant.
+- Dynamic constants are constants provided to the conductor when a Hercules IR program is started. Through this mechanism, Hercules IR can represent programs operating on a variable number of array elements, while forbidding runtime dynamic memory allocation (all dynamic memory allocation happens in the conductor).
+- The nodes in a function are structured as a flow graph, which an explicit start node. Although control and data flow from definitions to uses, def-use edges are stored implicitly in the IR. Each node stores its predecessor nodes, so use-def edges are stored explicitly. To query the def-use edges in an IR graph, use the `def_use` function.
+
+Below, all of the nodes in Hercules IR are described.
+
+## Start
+
+The start node of the IR flow graph. This node is implicitly defined in the text format. It takes no inputs. Its output type is the empty control type (control with no thread replication factors).
+
+## Region
+
+Region nodes are the mechanism for merging multiple branches inside Hercules IR. A region node takes at least one input - each input must have a control type, and all of the inputs must have the same control type. The output type of the region node is the same control type as all of its inputs. The main purpose of a region node is to drive some number of [phi](#phi) nodes.
+
+## If
+
+The branch mechanism in Hercules IR. An if node takes two inputs - a control predecessor, and a condition. The control predecessor must have control type, and the condition must have boolean type. The output type is a product of two control types, which are the same as the control input's type. Every if node must be followed directly by two [read\_prod](#readprod) nodes, each of which reads differing elements of the if node's output product. This is the mechanism by which the output edges from the if node (and also the [match](#match) node) are labelled, even though nodes only explicitly store their input edges.
+
+## Fork
+
+Fork (and [join](#join)) nodes are the mechanism for representing data-parallelism inside Hercules IR. A fork node takes one input - a control predecessor. A fork node also stores a thread replication factor (TRF), represented as a dynamic constant. The output type of a fork node is a control type, which is the same as the type of the control predecessor, with the TRF pushed to the end of the control type's factor list. Conceptually, for every thread that comes in to a fork node, TRF threads come out. A fork node can drive any number of children [thread\_id](#threadid) nodes. Each fork must have a single corresponding [join](#join) node - the fork must dominate the join node, and the join node must post-dominate the fork node (in the control flow subgraph).
+
+## Join
+
+Join (and [fork](#fork)) nodes are the mechanism for synchronizing data-parallel threads inside Hercules IR. A join nodes takes one input - a control predecessor. The output type of a join node is a control type, which is the same as the type of the control predecessor, with the last factor in the control type's list removed. Conceptually, after all threads created by the corresponding fork reach the join, then and only then does the join output a single thread. A join node can drive any number of children [collect](#collect) nodes. Each join must have a single corresponding [fork](#fork) node - the join must post-dominate the fork node, and the fork node must dominate the join node (in the control flow subgraph).
+
+## Phi
+
+Phi nodes merge potentially many data sources into one data output, driven by a corresponding region node. Phi nodes in Hercules IR perform the same function as phi nodes in other SSA-based IRs. Phi nodes take at least one input - a control predecessor, and some number of data inputs. The control predecessor of a phi node must be a region node. The data inputs must all have the same type. The output of the phi node has that data type. In the sea of nodes execution model, a phi node can be thought of as "latching" when its corresponding region node is reached. The phi node will latch to output the value of the input corresponding to the input that control traversed to reach the region node. After latching, the phi node's output won't change until the region node is reached again.
+
+## ThreadID
+
+The thread\_id node provides the thread ID as a datum to children nodes after a [fork](#fork) has been performed. A thread\_id node takes one input - a control predecessor. The control predecessor must be a [fork](#fork) node. The output type is a 64-bit unsigned integer. The output thread IDs generated by a thread\_id node range from 0 to TRF - 1, inclusive, where TRF is the thread replication factor of the input [fork](#fork) node.
+
+## Collect
+
+The collect node collects data from multiple executing threads, and puts them all into an array. A collect node takes two inputs - a control predecessor, and a data input. The control predecessor must be a [join](#join) node. The data input must have a non-control type. The output type will be an array, where the element type will be the type of the data input. The extent of the array will be equal to the thread replication factor of the [fork](#fork) node corresponding to the input [join](#join) node. For each datum input, the thread ID corresponding to that datum will be the index the datum is inserted into the array.
+
+## Return
+
+The return node returns some data from the current function. A return node has two inputs - a control predecessor, and a data input. The control predecessor must have a control type with an empty factor list - just as only one thread starts the execution of a function, only one thread can return from a function. The data input must have the same type as the function's return type. No node should use a return node as input (technically, the output type of a return node is an empty product type).
+
+## Parameter
+
+The parameter node represents a parameter of the function. A parameter node takes no inputs. A parameter node stores the parameter index of the function it corresponds to. Its value at runtime is the index-th argument to the function. Its output type is the type of the index-th parameter of the function.
+
+## Constant
+
+The constant node represents a constant value. A constant node takes no inputs. A constant node stores the constant ID of the constant it corresponds to. Its value at runtime is the constant it references. Its output type is the type of the constant it references.
+
+## DynamicConstant
+
+The dynamic\_constant node represents a dynamic constant, used as a runtime value. A dynamic\_constant node takes no inputs. A dynamic\_constant node stores the dynamic constant ID of the dynamic constant it corresponds to. Its value at runtime is the value of the dynamic constant it references, which is calculated at conductor time. Its output type is a 64-bit unsigned integer.
+
+## Unary
+
+The unary node represents a basic unary operation. A unary node takes one input - a data input. The data input must have a non-control type. A unary node additionally stores which unary operation it performs. The output type of the unary node is the same as its input type. The acceptable input data type depends on the unary operation.
+
+## Binary
+
+The binary node represents a basic binary operation. A binary node takes two inputs - a left data input, and a right data input. The left and right data inputs must be the same non-control type. A binary node additionally stores the binary operation it performs. The output type of the binary node is the same as its input type. The acceptable input data type depends on the binary operation.
-- 
GitLab


From 9ce068fa5e735924daad47d4b9d974b98215c765 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 5 Oct 2023 10:37:06 -0500
Subject: [PATCH 095/105] Doc more nodes

---
 IR.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/IR.md b/IR.md
index e93f9517..cda60a6e 100644
--- a/IR.md
+++ b/IR.md
@@ -65,3 +65,23 @@ The unary node represents a basic unary operation. A unary node takes one input
 ## Binary
 
 The binary node represents a basic binary operation. A binary node takes two inputs - a left data input, and a right data input. The left and right data inputs must be the same non-control type. A binary node additionally stores the binary operation it performs. The output type of the binary node is the same as its input type. The acceptable input data type depends on the binary operation.
+
+## Call
+
+The call node passes its inputs to a function, and outputs the result of the function call. A call node takes some number of data inputs. A call node also stores a reference to the function it calls. The number and types of the data inputs must match the referenced function. A call node also stores references to dynamic constants it uses as inputs to the function. The number of dynamic constants references must match the number of dynamic constant inputs of the referenced function. The output type of a call node is the return type of the referenced function. A call node notably does not take as input or output a control type. This is because all operations in Hercules IR are pure, including arbitrary function calls. Thus, the only things affecting a function call are the data inputs, and (conceptually) the function may be called an arbitrary amount of times.
+
+## ReadProd
+
+The read\_prod node reads an element from an product typed value. A read\_prod node takes one data input. A read\_prod node also stores the index into the product it reads. The type of the data input must be a product type. The index must be a valid index into the product type. The output type of a read\_prod node is the type of the index-th element in the product (0-indexed).
+
+## WriteProd
+
+The write\_prod node modifies an input product with an input datum, and outputs the new product. A write\_prod node takes two inputs - one product input, and one data input. A write\_prod node also stores the index into the product it writes. The type of the product input must be a product type. The type of the data input must be the same as the index-th element in the product (0-indexed). The output type of a write\_prod node is the same as the product input type.
+
+## ReadArray
+
+The read\_array node reads an element from an array typed value. A read\_array node takes two inputs - one array input, and one index input. The type of the array input must be an array type. The type of the index input must be an integer type. The output type of a read\_array node is the element type of the array input's array type. At runtime, if an out-of-bounds array access occurs, the conductor will eventually notify the host.
+
+## WriteArray
+
+The write\_array node modifies an input array with an input datum. A write\_array node takes three inputs - one array input, one data input, and one index input. The type of the array input must be an array type. The type of the data input must be the same as the element type of the array input's array type. The type of the index input must be an integer type. The output type of a write\_array node is the same as the array input's array type. At runtime, if an out-of-bounds array access occurs, the conductor will eventually notify the host.
-- 
GitLab


From 5939bca6da52f339ba90fac41d5493b6bf7934db Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Thu, 5 Oct 2023 10:45:45 -0500
Subject: [PATCH 096/105] Finish documenting nodes

---
 IR.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/IR.md b/IR.md
index cda60a6e..57c63320 100644
--- a/IR.md
+++ b/IR.md
@@ -85,3 +85,15 @@ The read\_array node reads an element from an array typed value. A read\_array n
 ## WriteArray
 
 The write\_array node modifies an input array with an input datum. A write\_array node takes three inputs - one array input, one data input, and one index input. The type of the array input must be an array type. The type of the data input must be the same as the element type of the array input's array type. The type of the index input must be an integer type. The output type of a write\_array node is the same as the array input's array type. At runtime, if an out-of-bounds array access occurs, the conductor will eventually notify the host.
+
+## Match
+
+The match node branches based on the variant of a sum typed value. A match node takes two inputs - a control predecessor, and a sum input. The control predecessor must have control type, and the sum input must have a sum type. The output type is a product of N control types, where N is the number of possible variants in the sum input's sum type. The control types in the product are the same as the control input's type. Every match node must be followed directly by N [read\_prod](#readprod) nodes, each of which reads differing elements of the match node's output product. This is the mechanism by which the output edges from the match node (and also the [if](#if) node) are labelled, even though nodes only explicitly store their input edges.
+
+## BuildSum
+
+The build\_sum node creates a sum typed value from a datum. A build\_sum node takes one input - a data input. A build\_sum node additionally stores the sum type it builds, as well as which variant of the aforementioned sum type it builds. The stored variant must be a valid variant inside the stored sum type. The type of the data input must match the type of the variant of the sum type. The output type of a build\_sum node is the aforementioned sum type.
+
+## ExtractSum
+
+The extract\_sum node extracts the concrete value inside a sum value, given a particular variant to extract. An extract\_sum node takes one input - a data input. The data input must have a sum type. An extract\_sum node also stored the variant it extracts. The stored variant must be a valid variant of the data input's sum type. The output type of an extract\_sum node is the type of the specified variant of the data input's sum type. At runtime, if the input sum value holds the stored variant, the output of an extract\_sum node is the value inside that variant in the sum value. If the input sum value holds a different variant, the output of an extract\_sum node is defined as the bit-pattern of all zeros for the output type of the extract\_sum node.
-- 
GitLab


From 00576c399bcafa5a126f636f121a353246ca7c33 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 8 Oct 2023 19:11:50 -0500
Subject: [PATCH 097/105] Control input dataflow analysis

---
 hercules_ir/src/dataflow.rs  | 134 ++++++++++++++++++++++++++++++++++-
 hercules_ir/src/ir.rs        |  74 +++++++++++++++++--
 hercules_ir/src/typecheck.rs |   4 +-
 3 files changed, 202 insertions(+), 10 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 69a00b65..95f32c1e 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -8,7 +8,7 @@ use crate::*;
  * Trait for a type that is a semilattice. Semilattice types must also be Eq,
  * so that the dataflow analysis can determine when to terminate.
  */
-pub trait Semilattice: Eq {
+pub trait Semilattice: Eq + Clone {
     fn meet(a: &Self, b: &Self) -> Self;
     fn bottom() -> Self;
     fn top() -> Self;
@@ -37,10 +37,18 @@ where
     let uses: Vec<NodeUses> = function.nodes.iter().map(|n| get_uses(n)).collect();
 
     // Step 2: create initial set of "out" points.
+    let start_node_output = flow_function(&[&L::bottom()], NodeID::new(0));
     let mut outs: Vec<L> = (0..function.nodes.len())
         .map(|id| {
             flow_function(
-                &vec![&(if id == 0 { L::bottom() } else { L::top() }); uses[id].as_ref().len()],
+                &vec![
+                    &(if id == 0 {
+                        start_node_output.clone()
+                    } else {
+                        L::top()
+                    });
+                    uses[id].as_ref().len()
+                ],
                 NodeID::new(id),
             )
         })
@@ -124,3 +132,125 @@ fn reverse_postorder_helper(
         (order, visited)
     }
 }
+
+/*
+ * A bit vector set is a very general kind of semilattice. This variant is for
+ * "intersecting" flow functions.
+ */
+#[derive(PartialEq, Eq, Clone)]
+pub enum IntersectNodeSet {
+    Empty,
+    Bits(BitVec<u8, Lsb0>),
+    Full,
+}
+
+impl Semilattice for IntersectNodeSet {
+    fn meet(a: &Self, b: &Self) -> Self {
+        match (a, b) {
+            (IntersectNodeSet::Full, b) => b.clone(),
+            (a, IntersectNodeSet::Full) => a.clone(),
+            (IntersectNodeSet::Bits(a), IntersectNodeSet::Bits(b)) => {
+                assert!(
+                    a.len() == b.len(),
+                    "IntersectNodeSets must have same length to meet."
+                );
+                IntersectNodeSet::Bits(a.clone() | b)
+            }
+            (IntersectNodeSet::Empty, _) => IntersectNodeSet::Empty,
+            (_, IntersectNodeSet::Empty) => IntersectNodeSet::Empty,
+        }
+    }
+
+    fn bottom() -> Self {
+        // For intersecting flow functions, the bottom state is empty.
+        IntersectNodeSet::Empty
+    }
+
+    fn top() -> Self {
+        // For intersecting flow functions, the bottom state is full.
+        IntersectNodeSet::Full
+    }
+}
+
+/*
+ * A bit vector set is a very general kind of semilattice. This variant is for
+ * "unioning" flow functions.
+ */
+#[derive(PartialEq, Eq, Clone)]
+pub enum UnionNodeSet {
+    Empty,
+    Bits(BitVec<u8, Lsb0>),
+    Full,
+}
+
+impl Semilattice for UnionNodeSet {
+    fn meet(a: &Self, b: &Self) -> Self {
+        match (a, b) {
+            (UnionNodeSet::Full, b) => b.clone(),
+            (a, UnionNodeSet::Full) => a.clone(),
+            (UnionNodeSet::Bits(a), UnionNodeSet::Bits(b)) => {
+                assert!(
+                    a.len() == b.len(),
+                    "UnionNodeSets must have same length to meet."
+                );
+                UnionNodeSet::Bits(a.clone() | b)
+            }
+            (UnionNodeSet::Empty, _) => UnionNodeSet::Empty,
+            (_, UnionNodeSet::Empty) => UnionNodeSet::Empty,
+        }
+    }
+
+    fn bottom() -> Self {
+        // For unioning flow functions, the bottom state is full.
+        UnionNodeSet::Full
+    }
+
+    fn top() -> Self {
+        // For unioning flow functions, the bottom state is empty.
+        UnionNodeSet::Empty
+    }
+}
+
+/*
+ * Below are some common flow functions. They all take a slice of semilattice
+ * references as their first argument, and a node ID as their second. However,
+ * they may in addition take more arguments (meaning that these functions
+ * should be used inside closures at a callsite of a top level dataflow
+ * function).
+ */
+
+/*
+ * Flow function for collecting all of a node's uses of "control outputs". What
+ * this flow function does is collect all phi, thread ID, and collect nodes that
+ * every other node depends on through data nodes. In other words, dependence
+ * on these three kinds of nodes can flow through data, but not through control
+ * nodes. Since forward_dataflow returns the out sets, to get the phi, thread
+ * ID, and collect nodes that a particular control node depends on, one should
+ * look at the out set of the data input that that control node depends on.
+ */
+pub fn control_output_flow(
+    inputs: &[&UnionNodeSet],
+    node_id: NodeID,
+    function: &Function,
+) -> UnionNodeSet {
+    // Step 1: union inputs.
+    let mut out = UnionNodeSet::top();
+    for input in inputs {
+        out = UnionNodeSet::meet(&out, input);
+    }
+
+    // Step 2: set bit for current node, if applicable.
+    let node = &function.nodes[node_id.idx()];
+    if node.is_phi() || node.is_thread_id() || node.is_collect() {
+        let mut singular = bitvec![u8, Lsb0; 0; function.nodes.len()];
+        singular.set(node_id.idx(), true);
+        out = UnionNodeSet::meet(&out, &UnionNodeSet::Bits(singular));
+    }
+
+    // Step 3: clear all bits if control node.
+    if node.is_strictly_control() {
+        out = UnionNodeSet::Empty;
+    }
+
+    out
+}
diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 9400f026..534436cf 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -285,17 +285,77 @@ pub enum BinaryOperator {
     RSh,
 }
 
+/*
+ * Simple predicate functions on nodes take a lot of space, so use a macro.
+ */
+
+macro_rules! define_pattern_predicate {
+    ($x: ident, $y: pat) => {
+        pub fn $x(&self) -> bool {
+            if let $y = self {
+                true
+            } else {
+                false
+            }
+        }
+    };
+}
+
 impl Node {
-    pub fn is_return(&self) -> bool {
-        if let Node::Return {
+    define_pattern_predicate!(is_start, Node::Start);
+    define_pattern_predicate!(is_region, Node::Region { preds: _ });
+    define_pattern_predicate!(
+        is_if,
+        Node::If {
+            control: _,
+            cond: _,
+        }
+    );
+    define_pattern_predicate!(
+        is_fork,
+        Node::Fork {
+            control: _,
+            factor: _,
+        }
+    );
+    define_pattern_predicate!(is_join, Node::Join { control: _ });
+    define_pattern_predicate!(
+        is_phi,
+        Node::Phi {
+            control: _,
+            data: _,
+        }
+    );
+    define_pattern_predicate!(is_thread_id, Node::ThreadID { control: _ });
+    define_pattern_predicate!(
+        is_collect,
+        Node::Collect {
+            control: _,
+            data: _,
+        }
+    );
+    define_pattern_predicate!(
+        is_return,
+        Node::Return {
             control: _,
             data: _,
-        } = self
-        {
-            true
-        } else {
-            false
         }
+    );
+    define_pattern_predicate!(is_match, Node::Match { control: _, sum: _ });
+
+    /*
+     * ReadProd nodes can be considered control when following an if or match
+     * node. However, it is sometimes useful to exclude such nodes when
+     * considering control nodes.
+     */
+    pub fn is_strictly_control(&self) -> bool {
+        self.is_start()
+            || self.is_region()
+            || self.is_if()
+            || self.is_fork()
+            || self.is_join()
+            || self.is_return()
+            || self.is_return()
     }
 
     pub fn upper_case_name(&self) -> &'static str {
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 6722a66d..12e42924 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -25,6 +25,9 @@ impl TypeSemilattice {
     }
 }
 
+/* Define custom PartialEq, so that dataflow will terminate right away if there
+ * are errors.
+ */
 impl PartialEq for TypeSemilattice {
     fn eq(&self, other: &Self) -> bool {
         match (self, other) {
@@ -39,7 +42,6 @@ impl PartialEq for TypeSemilattice {
 impl Semilattice for TypeSemilattice {
     fn meet(a: &Self, b: &Self) -> Self {
         match (a, b) {
-            (Unconstrained, Unconstrained) => Unconstrained,
             (Unconstrained, b) => b.clone(),
             (a, Unconstrained) => a.clone(),
             (Concrete(id1), Concrete(id2)) => {
-- 
GitLab


From 108ed866da596258191ccba510335330998fe096 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 8 Oct 2023 19:55:20 -0500
Subject: [PATCH 098/105] Verify that uses of phis are dominated by
 corresponding region

---
 hercules_ir/src/dataflow.rs  | 34 ++++++++++++---
 hercules_ir/src/dom.rs       |  8 ++++
 hercules_ir/src/typecheck.rs |  2 +-
 hercules_ir/src/verify.rs    | 82 +++++++++++++++++++++++++++++++++---
 4 files changed, 113 insertions(+), 13 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 95f32c1e..754285ac 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -137,13 +137,23 @@ fn reverse_postorder_helper(
  * A bit vector set is a very general kind of semilattice. This variant is for
  * "intersecting" flow functions.
  */
-#[derive(PartialEq, Eq, Clone)]
+#[derive(PartialEq, Eq, Clone, Debug)]
 pub enum IntersectNodeSet {
     Empty,
     Bits(BitVec<u8, Lsb0>),
     Full,
 }
 
+impl IntersectNodeSet {
+    pub fn is_set(&self, id: NodeID) -> bool {
+        match self {
+            IntersectNodeSet::Empty => false,
+            IntersectNodeSet::Bits(bits) => bits[id.idx()],
+            IntersectNodeSet::Full => true,
+        }
+    }
+}
+
 impl Semilattice for IntersectNodeSet {
     fn meet(a: &Self, b: &Self) -> Self {
         match (a, b) {
@@ -154,7 +164,7 @@ impl Semilattice for IntersectNodeSet {
                     a.len() == b.len(),
                     "IntersectNodeSets must have same length to meet."
                 );
-                IntersectNodeSet::Bits(a.clone() | b)
+                IntersectNodeSet::Bits(a.clone() & b)
             }
             (IntersectNodeSet::Empty, _) => IntersectNodeSet::Empty,
             (_, IntersectNodeSet::Empty) => IntersectNodeSet::Empty,
@@ -176,18 +186,28 @@ impl Semilattice for IntersectNodeSet {
  * A bit vector set is a very general kind of semilattice. This variant is for
  * "unioning" flow functions.
  */
-#[derive(PartialEq, Eq, Clone)]
+#[derive(PartialEq, Eq, Clone, Debug)]
 pub enum UnionNodeSet {
     Empty,
     Bits(BitVec<u8, Lsb0>),
     Full,
 }
 
+impl UnionNodeSet {
+    pub fn is_set(&self, id: NodeID) -> bool {
+        match self {
+            UnionNodeSet::Empty => false,
+            UnionNodeSet::Bits(bits) => bits[id.idx()],
+            UnionNodeSet::Full => true,
+        }
+    }
+}
+
 impl Semilattice for UnionNodeSet {
     fn meet(a: &Self, b: &Self) -> Self {
         match (a, b) {
-            (UnionNodeSet::Full, b) => b.clone(),
-            (a, UnionNodeSet::Full) => a.clone(),
+            (UnionNodeSet::Empty, b) => b.clone(),
+            (a, UnionNodeSet::Empty) => a.clone(),
             (UnionNodeSet::Bits(a), UnionNodeSet::Bits(b)) => {
                 assert!(
                     a.len() == b.len(),
@@ -195,8 +215,8 @@ impl Semilattice for UnionNodeSet {
                 );
                 UnionNodeSet::Bits(a.clone() | b)
             }
-            (UnionNodeSet::Empty, _) => UnionNodeSet::Empty,
-            (_, UnionNodeSet::Empty) => UnionNodeSet::Empty,
+            (UnionNodeSet::Full, _) => UnionNodeSet::Full,
+            (_, UnionNodeSet::Full) => UnionNodeSet::Full,
         }
     }
 
diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs
index 002cbd04..d359db65 100644
--- a/hercules_ir/src/dom.rs
+++ b/hercules_ir/src/dom.rs
@@ -38,6 +38,14 @@ impl DomTree {
     pub fn does_prop_dom(&self, a: NodeID, b: NodeID) -> bool {
         a != b && self.does_dom(a, b)
     }
+
+    /*
+     * Check if a node is in the dom tree (if the node is the root of the tree,
+     * will still return true).
+     */
+    pub fn is_non_root(&self, x: NodeID) -> bool {
+        self.idom.contains_key(&x)
+    }
 }
 
 /*
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index 12e42924..b6f2cf19 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -8,7 +8,7 @@ use self::TypeSemilattice::*;
 /*
  * Enum for type semilattice.
  */
-#[derive(Eq, Clone)]
+#[derive(Eq, Clone, Debug)]
 enum TypeSemilattice {
     Unconstrained,
     Concrete(TypeID),
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index 89192c01..45a0453f 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -33,19 +33,25 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
     }
 
     // Check SSA, fork, and join dominance relations.
-    for (function, def_use) in zip(module.functions.iter(), def_uses) {
-        let subgraph = control_subgraph(function, &def_use);
+    for (function, (def_use, reverse_postorder)) in zip(
+        module.functions.iter(),
+        zip(def_uses.iter(), reverse_postorders.iter()),
+    ) {
+        let control_output_dependencies =
+            forward_dataflow(function, reverse_postorder, |inputs, id| {
+                control_output_flow(inputs, id, function)
+            });
+        let subgraph = control_subgraph(function, def_use);
         let dom = dominator(&subgraph, NodeID::new(0));
         let postdom = postdominator(subgraph, NodeID::new(function.nodes.len()));
-        println!("{:?}", dom);
-        println!("{:?}", postdom);
+        verify_dominance_relationships(function, &control_output_dependencies, &dom, &postdom)?;
     }
 
     Ok(typing)
 }
 
 /*
- * There are structural constraints the IR must follow, such as all Phi nodes'
+ * There are structural constraints the IR must follow, such as all phi nodes'
  * control input must be a region node. This is where those properties are
  * verified.
  */
@@ -161,5 +167,71 @@ fn verify_structure(
             _ => {}
         };
     }
+
+    Ok(())
+}
+
+/*
+ * There are dominance relationships the IR must follow, such as all uses of a
+ * phi node must be dominated by the corresponding region node.
+ */
+fn verify_dominance_relationships(
+    function: &Function,
+    control_output_dependencies: &Vec<UnionNodeSet>,
+    dom: &DomTree,
+    postdom: &DomTree,
+) -> Result<(), String> {
+    for idx in 0..function.nodes.len() {
+        let dependencies = &control_output_dependencies[idx];
+        for other_idx in 0..function.nodes.len() {
+            if dependencies.is_set(NodeID::new(other_idx)) {
+                match function.nodes[other_idx] {
+                    Node::Phi { control, data: _ } => {
+                        // If the current node is a control node and the phi's
+                        // region doesn't dominate it, then the phi doesn't
+                        // dominate its use.
+                        if dom.is_non_root(NodeID::new(idx))
+                            && !dom.does_dom(control, NodeID::new(idx))
+                        {
+                            Err(format!(
+                                "Phi node (ID {}) doesn't dominate its use (ID {}).",
+                                other_idx, idx
+                            ))?;
+                        }
+
+                        // If the current node is a phi or collect node whose
+                        // corresponding region or join node isn't dominated by
+                        // the other phi node, then the other phi doesn't
+                        // dominate its use.
+                        if let Node::Phi {
+                            control: dominated_control,
+                            data: _,
+                        } = function.nodes[idx]
+                        {
+                            if !dom.does_dom(control, dominated_control) {
+                                Err(format!(
+                                    "Phi node (ID {}) doesn't dominate its use (ID {}).",
+                                    other_idx, idx
+                                ))?;
+                            }
+                        } else if let Node::Collect {
+                            control: dominated_control,
+                            data: _,
+                        } = function.nodes[idx]
+                        {
+                            if !dom.does_dom(control, dominated_control) {
+                                Err(format!(
+                                    "Phi node (ID {}) doesn't dominate its use (ID {}).",
+                                    other_idx, idx
+                                ))?;
+                            }
+                        }
+                    }
+                    _ => {}
+                }
+            }
+        }
+    }
+
     Ok(())
 }
-- 
GitLab


From f0d332596764b9ebf15cba39deff3ef15b59ab50 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 8 Oct 2023 19:57:23 -0500
Subject: [PATCH 099/105] Add comment

---
 hercules_ir/src/verify.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index 45a0453f..f64f09e6 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -202,7 +202,12 @@ fn verify_dominance_relationships(
                         // If the current node is a phi or collect node whose
                         // corresponding region or join node isn't dominated by
                         // the other phi node, then the other phi doesn't
-                        // dominate its use.
+                        // dominate its use. We don't need to do something
+                        // similar for thread ID nodes, since they have no data
+                        // input. In fact, it's impossible to reach this point
+                        // in control as a thread ID node, since it can't
+                        // possibly depend on a phi, thread ID, or collect node
+                        // in the first place.
                         if let Node::Phi {
                             control: dominated_control,
                             data: _,
-- 
GitLab


From 95854363b50089c12ce0c6e87c7184ac1f48d690 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 8 Oct 2023 20:37:13 -0500
Subject: [PATCH 100/105] Refactor

---
 hercules_ir/src/verify.rs | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index f64f09e6..907e27d6 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -186,7 +186,7 @@ fn verify_dominance_relationships(
         for other_idx in 0..function.nodes.len() {
             if dependencies.is_set(NodeID::new(other_idx)) {
                 match function.nodes[other_idx] {
-                    Node::Phi { control, data: _ } => {
+                    Node::Phi { control, data: _ } | Node::Collect { control, data: _ } => {
                         // If the current node is a control node and the phi's
                         // region doesn't dominate it, then the phi doesn't
                         // dominate its use.
@@ -194,8 +194,10 @@ fn verify_dominance_relationships(
                             && !dom.does_dom(control, NodeID::new(idx))
                         {
                             Err(format!(
-                                "Phi node (ID {}) doesn't dominate its use (ID {}).",
-                                other_idx, idx
+                                "{} node (ID {}) doesn't dominate its use (ID {}).",
+                                function.nodes[other_idx].upper_case_name(),
+                                other_idx,
+                                idx
                             ))?;
                         }
 
@@ -211,23 +213,18 @@ fn verify_dominance_relationships(
                         if let Node::Phi {
                             control: dominated_control,
                             data: _,
-                        } = function.nodes[idx]
-                        {
-                            if !dom.does_dom(control, dominated_control) {
-                                Err(format!(
-                                    "Phi node (ID {}) doesn't dominate its use (ID {}).",
-                                    other_idx, idx
-                                ))?;
-                            }
-                        } else if let Node::Collect {
+                        }
+                        | Node::Collect {
                             control: dominated_control,
                             data: _,
                         } = function.nodes[idx]
                         {
                             if !dom.does_dom(control, dominated_control) {
                                 Err(format!(
-                                    "Phi node (ID {}) doesn't dominate its use (ID {}).",
-                                    other_idx, idx
+                                    "{} node (ID {}) doesn't dominate its use (ID {}).",
+                                    function.nodes[other_idx].upper_case_name(),
+                                    other_idx,
+                                    idx
                                 ))?;
                             }
                         }
-- 
GitLab


From 4cf90022b81c20c15aa8656b2f65b0b0a5806563 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 8 Oct 2023 21:03:00 -0500
Subject: [PATCH 101/105] Make control type explicitly store fork nodes, rather
 than just factors

---
 hercules_ir/src/ir.rs        |  2 +-
 hercules_ir/src/parse.rs     |  5 ++++-
 hercules_ir/src/typecheck.rs | 16 +++++++++++++---
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs
index 534436cf..dee8fb9e 100644
--- a/hercules_ir/src/ir.rs
+++ b/hercules_ir/src/ir.rs
@@ -46,7 +46,7 @@ pub struct Function {
  */
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Type {
-    Control(Box<[DynamicConstantID]>),
+    Control(Box<[NodeID]>),
     Boolean,
     Integer8,
     Integer16,
diff --git a/hercules_ir/src/parse.rs b/hercules_ir/src/parse.rs
index fcabe771..8b666be0 100644
--- a/hercules_ir/src/parse.rs
+++ b/hercules_ir/src/parse.rs
@@ -638,7 +638,10 @@ fn parse_type<'a>(ir_text: &'a str, context: &RefCell<Context<'a>>) -> nom::IRes
                         nom::character::complete::char(','),
                         nom::character::complete::multispace0,
                     )),
-                    |x| parse_dynamic_constant_id(x, context),
+                    |x| {
+                        let (ir_text, node) = parse_identifier(x)?;
+                        Ok((ir_text, context.borrow_mut().get_node_id(node)))
+                    },
                 ),
                 nom::character::complete::multispace0,
                 nom::character::complete::char(')'),
diff --git a/hercules_ir/src/typecheck.rs b/hercules_ir/src/typecheck.rs
index b6f2cf19..137f4411 100644
--- a/hercules_ir/src/typecheck.rs
+++ b/hercules_ir/src/typecheck.rs
@@ -250,7 +250,10 @@ fn typeflow(
 
             inputs[0].clone()
         }
-        Node::Fork { control: _, factor } => {
+        Node::Fork {
+            control: _,
+            factor: _,
+        } => {
             if inputs.len() != 1 {
                 return Error(String::from("Fork node must have exactly one input."));
             }
@@ -259,7 +262,7 @@ fn typeflow(
                 if let Type::Control(factors) = &types[id.idx()] {
                     // Fork adds a new factor to the thread spawn factor list.
                     let mut new_factors = factors.clone().into_vec();
-                    new_factors.push(*factor);
+                    new_factors.push(node_id);
 
                     // Out type is control type, with the new thread spawn
                     // factor.
@@ -293,7 +296,14 @@ fn typeflow(
                         return Error(String::from("Join node's first input must have a control type with at least one thread replication factor."));
                     }
                     let mut new_factors = factors.clone().into_vec();
-                    join_factor_map.insert(node_id, new_factors.pop().unwrap());
+                    let factor = if let Node::Fork { control: _, factor } =
+                        function.nodes[new_factors.pop().unwrap().idx()]
+                    {
+                        factor
+                    } else {
+                        panic!("Node ID in factor list doesn't correspond with a fork node.");
+                    };
+                    join_factor_map.insert(node_id, factor);
 
                     // Out type is the new control type.
                     let control_out_id = get_type_id(
-- 
GitLab


From 165ed1533ceb4983695d5f7fb21d1870429695a1 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 8 Oct 2023 21:21:56 -0500
Subject: [PATCH 102/105] Verify fork/join dominate/postdominate each other

---
 hercules_ir/src/verify.rs | 44 ++++++++++++++++++++++++++++++++-------
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index 907e27d6..1b330dd4 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -33,8 +33,8 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
     }
 
     // Check SSA, fork, and join dominance relations.
-    for (function, (def_use, reverse_postorder)) in zip(
-        module.functions.iter(),
+    for ((function, typing), (def_use, reverse_postorder)) in zip(
+        zip(module.functions.iter(), typing.iter()),
         zip(def_uses.iter(), reverse_postorders.iter()),
     ) {
         let control_output_dependencies =
@@ -44,7 +44,14 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
         let subgraph = control_subgraph(function, def_use);
         let dom = dominator(&subgraph, NodeID::new(0));
         let postdom = postdominator(subgraph, NodeID::new(function.nodes.len()));
-        verify_dominance_relationships(function, &control_output_dependencies, &dom, &postdom)?;
+        verify_dominance_relationships(
+            function,
+            typing,
+            &module.types,
+            &control_output_dependencies,
+            &dom,
+            &postdom,
+        )?;
     }
 
     Ok(typing)
@@ -177,15 +184,41 @@ fn verify_structure(
  */
 fn verify_dominance_relationships(
     function: &Function,
+    typing: &Vec<TypeID>,
+    types: &Vec<Type>,
     control_output_dependencies: &Vec<UnionNodeSet>,
     dom: &DomTree,
     postdom: &DomTree,
 ) -> Result<(), String> {
     for idx in 0..function.nodes.len() {
+        match function.nodes[idx] {
+            // Verify that joins are dominated by their corresponding
+            // forks.
+            Node::Join { control } => {
+                // Check type of control predecessor. The last node ID
+                // in the factor list is the corresponding fork node ID.
+                if let Type::Control(factors) = &types[typing[control.idx()].idx()] {
+                    let join_id = NodeID::new(idx);
+                    let fork_id = *factors.last().unwrap();
+                    if !dom.does_dom(fork_id, join_id) {
+                        Err(format!("Fork node (ID {}) doesn't dominate its corresponding join node (ID {}).", fork_id.idx(), join_id.idx()))?;
+                    }
+                    if !postdom.does_dom(join_id, fork_id) {
+                        Err(format!("Join node (ID {}) doesn't postdominate its corresponding fork node (ID {}).", join_id.idx(), fork_id.idx()))?;
+                    }
+                } else {
+                    panic!("Join node's control predecessor has a non-control type.");
+                }
+            }
+            _ => {}
+        }
+
         let dependencies = &control_output_dependencies[idx];
         for other_idx in 0..function.nodes.len() {
             if dependencies.is_set(NodeID::new(other_idx)) {
                 match function.nodes[other_idx] {
+                    // Verify that uses of phis / collect nodes are dominated
+                    // the corresponding region / join nodes, respectively.
                     Node::Phi { control, data: _ } | Node::Collect { control, data: _ } => {
                         // If the current node is a control node and the phi's
                         // region doesn't dominate it, then the phi doesn't
@@ -206,10 +239,7 @@ fn verify_dominance_relationships(
                         // the other phi node, then the other phi doesn't
                         // dominate its use. We don't need to do something
                         // similar for thread ID nodes, since they have no data
-                        // input. In fact, it's impossible to reach this point
-                        // in control as a thread ID node, since it can't
-                        // possibly depend on a phi, thread ID, or collect node
-                        // in the first place.
+                        // input.
                         if let Node::Phi {
                             control: dominated_control,
                             data: _,
-- 
GitLab


From db6668b036dd130d411df991a4302bef3aa2329c Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 8 Oct 2023 21:57:51 -0500
Subject: [PATCH 103/105] Verify dominance relationships in IR

---
 hercules_ir/src/dataflow.rs             |  25 +++---
 hercules_ir/src/verify.rs               | 103 +++++++++++++++---------
 hercules_tools/src/hercules_dot/main.rs |   2 +-
 3 files changed, 78 insertions(+), 52 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 754285ac..4bef9d00 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -241,12 +241,9 @@ impl Semilattice for UnionNodeSet {
 
 /*
  * Flow function for collecting all of a node's uses of "control outputs". What
- * this flow function does is collect all phi, thread ID, and collect nodes that
- * every other node depends on through data nodes. In other words, dependence
- * on these three kinds of nodes can flow through data, but not through control
- * nodes. Since forward_dataflow returns the out sets, to get the phi, thread
- * ID, and collect nodes that a particular control node depends on, one should
- * look at the out set of the data input that that control node depends on.
+ * this flow function does is collect all immediate phi, thread ID, and collect
+ * nodes that every other node depends on through data nodes. Flow is ended at
+ * a control node, or at a phi, thread ID, or collect node.
  */
 pub fn control_output_flow(
     inputs: &[&UnionNodeSet],
@@ -258,19 +255,19 @@ pub fn control_output_flow(
     for input in inputs {
         out = UnionNodeSet::meet(&out, input);
     }
-
-    // Step 2: set bit for current node, if applicable.
     let node = &function.nodes[node_id.idx()];
-    if node.is_phi() || node.is_thread_id() || node.is_collect() {
+
+    // Step 2: clear all bits, if applicable.
+    if node.is_strictly_control() || node.is_thread_id() || node.is_collect() || node.is_phi() {
+        out = UnionNodeSet::Empty;
+    }
+
+    // Step 3: set bit for current node, if applicable.
+    if node.is_thread_id() || node.is_collect() || node.is_phi() {
         let mut singular = bitvec![u8, Lsb0; 0; function.nodes.len()];
         singular.set(node_id.idx(), true);
         out = UnionNodeSet::meet(&out, &UnionNodeSet::Bits(singular));
     }
 
-    // Step 3: clear all bits if control node.
-    if node.is_strictly_control() {
-        out = UnionNodeSet::Empty;
-    }
-
     out
 }
diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index 1b330dd4..37533f6a 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -1,5 +1,6 @@
 extern crate bitvec;
 
+use std::collections::HashMap;
 use std::iter::zip;
 
 use verify::bitvec::prelude::*;
@@ -190,10 +191,12 @@ fn verify_dominance_relationships(
     dom: &DomTree,
     postdom: &DomTree,
 ) -> Result<(), String> {
+    let mut fork_join_map = HashMap::new();
     for idx in 0..function.nodes.len() {
         match function.nodes[idx] {
-            // Verify that joins are dominated by their corresponding
-            // forks.
+            // Verify that joins are dominated by their corresponding forks. At
+            // the same time, assemble a map from forks to their corresponding
+            // joins.
             Node::Join { control } => {
                 // Check type of control predecessor. The last node ID
                 // in the factor list is the corresponding fork node ID.
@@ -206,57 +209,83 @@ fn verify_dominance_relationships(
                     if !postdom.does_dom(join_id, fork_id) {
                         Err(format!("Join node (ID {}) doesn't postdominate its corresponding fork node (ID {}).", join_id.idx(), fork_id.idx()))?;
                     }
+                    fork_join_map.insert(fork_id, join_id);
                 } else {
                     panic!("Join node's control predecessor has a non-control type.");
                 }
             }
             _ => {}
         }
+    }
 
-        let dependencies = &control_output_dependencies[idx];
-        for other_idx in 0..function.nodes.len() {
-            if dependencies.is_set(NodeID::new(other_idx)) {
-                match function.nodes[other_idx] {
+    // Loop over the nodes twice, since we need to completely assemble the
+    // fork_join_map in the first loop before using it in this second loop.
+    for idx in 0..function.nodes.len() {
+        // Having a control output dependency only matters if
+        // this node is a control node, or if this node is a
+        // control output of a control node. If this node is a
+        // control output, then we want to consider the control
+        // node itself.
+        let this_id = if let Node::Phi {
+            control: dominated_control,
+            data: _,
+        }
+        | Node::ThreadID {
+            control: dominated_control,
+        }
+        | Node::Collect {
+            control: dominated_control,
+            data: _,
+        } = function.nodes[idx]
+        {
+            dominated_control
+        } else {
+            NodeID::new(idx)
+        };
+
+        // control_output_dependencies contains the "out" values from the
+        // control output dataflow analysis, while we need the "in" values.
+        // This can be easily reconstructed.
+        let mut dependencies = UnionNodeSet::top();
+        for input in get_uses(&function.nodes[idx]).as_ref() {
+            dependencies =
+                UnionNodeSet::meet(&dependencies, &control_output_dependencies[input.idx()]);
+        }
+        for pred_idx in 0..function.nodes.len() {
+            if dependencies.is_set(NodeID::new(pred_idx)) {
+                match function.nodes[pred_idx] {
                     // Verify that uses of phis / collect nodes are dominated
-                    // the corresponding region / join nodes, respectively.
+                    // by the corresponding region / join nodes, respectively.
                     Node::Phi { control, data: _ } | Node::Collect { control, data: _ } => {
-                        // If the current node is a control node and the phi's
-                        // region doesn't dominate it, then the phi doesn't
-                        // dominate its use.
-                        if dom.is_non_root(NodeID::new(idx))
-                            && !dom.does_dom(control, NodeID::new(idx))
-                        {
+                        if dom.is_non_root(this_id) && !dom.does_dom(control, this_id) {
                             Err(format!(
                                 "{} node (ID {}) doesn't dominate its use (ID {}).",
-                                function.nodes[other_idx].upper_case_name(),
-                                other_idx,
+                                function.nodes[pred_idx].upper_case_name(),
+                                pred_idx,
                                 idx
                             ))?;
                         }
-
-                        // If the current node is a phi or collect node whose
-                        // corresponding region or join node isn't dominated by
-                        // the other phi node, then the other phi doesn't
-                        // dominate its use. We don't need to do something
-                        // similar for thread ID nodes, since they have no data
-                        // input.
-                        if let Node::Phi {
-                            control: dominated_control,
-                            data: _,
+                    }
+                    // Verify that uses of thread ID nodes are dominated by the
+                    // corresponding fork nodes.
+                    Node::ThreadID { control } => {
+                        if dom.is_non_root(this_id) && !dom.does_dom(control, this_id) {
+                            Err(format!(
+                                "ThreadID node (ID {}) doesn't dominate its use (ID {}).",
+                                pred_idx, idx
+                            ))?;
                         }
-                        | Node::Collect {
-                            control: dominated_control,
-                            data: _,
-                        } = function.nodes[idx]
+
+                        // Every use of a thread ID must be postdominated by
+                        // the thread ID's fork's corresponding join node. We
+                        // don't need to check for the case where the thread ID
+                        // flows through the collect node out of the fork-join,
+                        // because after the collect, the thread ID is no longer
+                        // considered an immediate control output use.
+                        if postdom.is_non_root(this_id)
+                            && !postdom.does_dom(*fork_join_map.get(&control).unwrap(), this_id)
                         {
-                            if !dom.does_dom(control, dominated_control) {
-                                Err(format!(
-                                    "{} node (ID {}) doesn't dominate its use (ID {}).",
-                                    function.nodes[other_idx].upper_case_name(),
-                                    other_idx,
-                                    idx
-                                ))?;
-                            }
+                            Err(format!("ThreadID node's (ID {}) fork's join doesn't postdominate its use (ID {}).", pred_idx, idx))?;
                         }
                     }
                     _ => {}
diff --git a/hercules_tools/src/hercules_dot/main.rs b/hercules_tools/src/hercules_dot/main.rs
index 226f91db..22608268 100644
--- a/hercules_tools/src/hercules_dot/main.rs
+++ b/hercules_tools/src/hercules_dot/main.rs
@@ -29,7 +29,7 @@ fn main() {
     let mut module =
         hercules_ir::parse::parse(&contents).expect("PANIC: Failed to parse Hercules IR file.");
     let _types = hercules_ir::verify::verify(&mut module)
-        .expect("PANIC: Failed to typecheck Hercules IR module.");
+        .expect("PANIC: Failed to verify Hercules IR module.");
     if args.output.is_empty() {
         let mut tmp_path = temp_dir();
         tmp_path.push("hercules_dot.dot");
-- 
GitLab


From 2917d59afd9e19b8732925431d7f22fd077804cc Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Sun, 8 Oct 2023 21:59:44 -0500
Subject: [PATCH 104/105] Collect domtrees

---
 hercules_ir/src/verify.rs               | 10 +++++++---
 hercules_tools/src/hercules_dot/main.rs |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs
index 37533f6a..262f749e 100644
--- a/hercules_ir/src/verify.rs
+++ b/hercules_ir/src/verify.rs
@@ -12,7 +12,7 @@ use crate::*;
  * useful results (typing, dominator trees, etc.), so if verification succeeds,
  * return those useful results. Otherwise, return the first error string found.
  */
-pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
+pub fn verify(module: &mut Module) -> Result<(ModuleTyping, Vec<DomTree>, Vec<DomTree>), String> {
     let def_uses: Vec<_> = module
         .functions
         .iter()
@@ -33,7 +33,9 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
         verify_structure(function, def_use, typing, &module.types)?;
     }
 
-    // Check SSA, fork, and join dominance relations.
+    // Check SSA, fork, and join dominance relations. Collect domtrees.
+    let mut doms = vec![];
+    let mut postdoms = vec![];
     for ((function, typing), (def_use, reverse_postorder)) in zip(
         zip(module.functions.iter(), typing.iter()),
         zip(def_uses.iter(), reverse_postorders.iter()),
@@ -53,9 +55,11 @@ pub fn verify(module: &mut Module) -> Result<ModuleTyping, String> {
             &dom,
             &postdom,
         )?;
+        doms.push(dom);
+        postdoms.push(postdom);
     }
 
-    Ok(typing)
+    Ok((typing, doms, postdoms))
 }
 
 /*
diff --git a/hercules_tools/src/hercules_dot/main.rs b/hercules_tools/src/hercules_dot/main.rs
index 22608268..c943472f 100644
--- a/hercules_tools/src/hercules_dot/main.rs
+++ b/hercules_tools/src/hercules_dot/main.rs
@@ -28,7 +28,7 @@ fn main() {
         .expect("PANIC: Unable to read input file contents.");
     let mut module =
         hercules_ir::parse::parse(&contents).expect("PANIC: Failed to parse Hercules IR file.");
-    let _types = hercules_ir::verify::verify(&mut module)
+    let (_types, _doms, _postdoms) = hercules_ir::verify::verify(&mut module)
         .expect("PANIC: Failed to verify Hercules IR module.");
     if args.output.is_empty() {
         let mut tmp_path = temp_dir();
-- 
GitLab


From f359260ac2452cea9d153c056e9573f1c7793a78 Mon Sep 17 00:00:00 2001
From: Russel Arbore <russel.jma@gmail.com>
Date: Mon, 9 Oct 2023 13:23:08 -0500
Subject: [PATCH 105/105] fix

---
 hercules_ir/src/dataflow.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs
index 4bef9d00..92886d3b 100644
--- a/hercules_ir/src/dataflow.rs
+++ b/hercules_ir/src/dataflow.rs
@@ -177,7 +177,7 @@ impl Semilattice for IntersectNodeSet {
     }
 
     fn top() -> Self {
-        // For intersecting flow functions, the bottom state is full.
+        // For intersecting flow functions, the top state is full.
         IntersectNodeSet::Full
     }
 }
@@ -226,7 +226,7 @@ impl Semilattice for UnionNodeSet {
     }
 
     fn top() -> Self {
-        // For unioning flow functions, the bottom state is empty.
+        // For unioning flow functions, the top state is empty.
         UnionNodeSet::Empty
     }
 }
-- 
GitLab