diff --git a/Cargo.lock b/Cargo.lock index e525360763eadfc7dccd68bf541dc961667962c2..1e301dc0b414a284180c5ff73e6f0425537585f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -143,6 +143,13 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "hercules_codegen" +version = "0.1.0" +dependencies = [ + "hercules_ir", +] + [[package]] name = "hercules_ir" version = "0.1.0" @@ -152,12 +159,21 @@ dependencies = [ "ordered-float", ] +[[package]] +name = "hercules_opt" +version = "0.1.0" +dependencies = [ + "hercules_ir", +] + [[package]] name = "hercules_tools" version = "0.1.0" dependencies = [ "clap", + "hercules_codegen", "hercules_ir", + "hercules_opt", "rand", ] diff --git a/Cargo.toml b/Cargo.toml index a5227e311467bcf857ec0d14c6e36b2d337b95b1..fc931a3cd3057a8ae8f64a939d51ba14b465caee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,8 @@ [workspace] members = [ + "hercules_codegen", "hercules_ir", + "hercules_opt", "hercules_tools" ] diff --git a/hercules_codegen/Cargo.toml b/hercules_codegen/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..bd50b13f981284a2b4c13e4fc16f7d23a2b43e45 --- /dev/null +++ b/hercules_codegen/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "hercules_codegen" +version = "0.1.0" +authors = ["Russel Arbore <rarbore2@illinois.edu>"] + +[dependencies] +hercules_ir = { path = "../hercules_ir" } diff --git a/hercules_codegen/src/gcm.rs b/hercules_codegen/src/gcm.rs new file mode 100644 index 0000000000000000000000000000000000000000..132ce305e2ea1fe80a7e90e8197d4c22ac9b2500 --- /dev/null +++ b/hercules_codegen/src/gcm.rs @@ -0,0 +1,71 @@ +extern crate hercules_ir; + +use std::collections::HashMap; + +use self::hercules_ir::dataflow::*; +use self::hercules_ir::def_use::*; +use self::hercules_ir::dom::*; +use self::hercules_ir::ir::*; +use self::hercules_ir::loops::*; +use self::hercules_ir::subgraph::*; + +/* + * Top level global code motion function. Assigns each data node to one of its + * immediate control use / user nodes, forming (unordered) basic blocks. Returns + * the control node / basic block each node is in. + */ +pub fn gcm( + function: &Function, + def_use: &ImmutableDefUseMap, + reverse_postorder: &Vec<NodeID>, + control_subgraph: &Subgraph, + dom: &DomTree, + fork_join_map: &HashMap<NodeID, NodeID>, +) -> Vec<NodeID> { + // Step 1: find the immediate control uses and immediate control users of + // each node. + let immediate_control_uses = + forward_dataflow(function, reverse_postorder, |inputs, node_id| { + immediate_control_flow(inputs, node_id, function) + }); + let immediate_control_users = + backward_dataflow(function, def_use, reverse_postorder, |inputs, node_id| { + immediate_control_flow(inputs, node_id, function) + }); + + // Step 2: calculate loop tree of function. + let loops = loops(&control_subgraph, NodeID::new(0), &dom, fork_join_map); + + // Step 3: find most control dependent, shallowest loop level node for every + // node. + let bbs = (0..function.nodes.len()) + .map(|idx| { + let highest = + dom.lowest_amongst(immediate_control_uses[idx].nodes(function.nodes.len() as u32)); + let lowest = dom + .common_ancestor(immediate_control_users[idx].nodes(function.nodes.len() as u32)); + + // Collect into vector to reverse, since we want to traverse down + // the dom tree, not up it. + let mut chain = dom + .chain(lowest, highest) + .collect::<Vec<_>>() + .into_iter() + .rev(); + + let mut location = chain.next().unwrap(); + while let Some(control_node) = chain.next() { + // Traverse down the dom tree until we find a loop. + if loops.contains(control_node) { + break; + } else { + location = control_node; + } + } + + location + }) + .collect(); + + bbs +} diff --git a/hercules_codegen/src/lib.rs b/hercules_codegen/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..fd605651eed486718d591a1aa7d979f3e0c28ad5 --- /dev/null +++ b/hercules_codegen/src/lib.rs @@ -0,0 +1,3 @@ +pub mod gcm; + +pub use crate::gcm::*; diff --git a/hercules_ir/src/dataflow.rs b/hercules_ir/src/dataflow.rs index 0cb95e31f96b2648b9e3988466d0b6102641415e..c3f3e9f900116e79f10803f3225fa5c5565f0bbb 100644 --- a/hercules_ir/src/dataflow.rs +++ b/hercules_ir/src/dataflow.rs @@ -1,6 +1,7 @@ extern crate bitvec; -use dataflow::bitvec::prelude::*; +use self::bitvec::prelude::*; +use self::bitvec::slice::*; use crate::*; @@ -33,7 +34,7 @@ where L: Semilattice, F: FnMut(&[&L], NodeID) -> L, { - forward_dataflow_global(function, reverse_postorder, |global_outs, node_id| { + dataflow_global(function, reverse_postorder, |global_outs, node_id| { let uses = get_uses(&function.nodes[node_id.idx()]); let pred_outs: Vec<_> = uses .as_ref() @@ -45,16 +46,42 @@ where } /* - * The previous forward dataflow routine wraps around this dataflow routine, + * Top level backward dataflow function. Instead of passing the uses' lattice + * values to the flow function, passes in the users' lattice values. + */ +pub fn backward_dataflow<L, F>( + function: &Function, + def_use: &ImmutableDefUseMap, + reverse_postorder: &Vec<NodeID>, + mut flow_function: F, +) -> Vec<L> +where + L: Semilattice, + F: FnMut(&[&L], NodeID) -> L, +{ + let mut postorder = reverse_postorder.clone(); + postorder.reverse(); + dataflow_global(function, &postorder, |global_outs, node_id| { + let users = def_use.get_users(node_id); + let succ_outs: Vec<_> = users + .as_ref() + .iter() + .map(|id| &global_outs[id.idx()]) + .collect(); + flow_function(&succ_outs, node_id) + }) +} + +/* + * The previous forward dataflow routines wraps around this dataflow routine, * where the flow function doesn't just have access to this nodes input lattice * values, but also all the current lattice values for all the nodes. This is * useful for some dataflow analyses, such as reachability. The "global" in - * forward_dataflow_global refers to having a global view of the out lattice - * values. + * dataflow_global refers to having a global view of the out lattice values. */ -pub fn forward_dataflow_global<L, F>( +pub fn dataflow_global<L, F>( function: &Function, - reverse_postorder: &Vec<NodeID>, + order: &Vec<NodeID>, mut flow_function: F, ) -> Vec<L> where @@ -62,9 +89,7 @@ where F: FnMut(&[L], NodeID) -> L, { // Step 1: create initial set of "out" points. - let start_node_output = flow_function(&[], NodeID::new(0)); - let mut first_ins = vec![L::top(); function.nodes.len()]; - first_ins[0] = start_node_output; + let first_ins = vec![L::top(); function.nodes.len()]; let mut outs: Vec<L> = (0..function.nodes.len()) .map(|id| flow_function(&first_ins, NodeID::new(id))) .collect(); @@ -73,9 +98,9 @@ where loop { let mut change = false; - // Iterate nodes in reverse post order. - for node_id in reverse_postorder { - // Compute new "out" value from predecessor "out" values. + // Iterate nodes in specified order. + for node_id in order { + // Compute new "out" value from previous "out" values. let new_out = flow_function(&outs, *node_id); if outs[node_id.idx()] != new_out { change = true; @@ -157,6 +182,16 @@ impl IntersectNodeSet { IntersectNodeSet::Full => true, } } + + pub fn nodes(&self, num_nodes: u32) -> NodeSetIterator { + match self { + IntersectNodeSet::Empty => NodeSetIterator::Empty, + IntersectNodeSet::Bits(bitvec) => { + NodeSetIterator::Bits(bitvec.iter_ones().map(NodeID::new)) + } + IntersectNodeSet::Full => NodeSetIterator::Full(0, num_nodes), + } + } } impl Semilattice for IntersectNodeSet { @@ -205,6 +240,16 @@ impl UnionNodeSet { UnionNodeSet::Full => true, } } + + pub fn nodes(&self, num_nodes: u32) -> NodeSetIterator { + match self { + UnionNodeSet::Empty => NodeSetIterator::Empty, + UnionNodeSet::Bits(bitvec) => { + NodeSetIterator::Bits(bitvec.iter_ones().map(NodeID::new)) + } + UnionNodeSet::Full => NodeSetIterator::Full(0, num_nodes), + } + } } impl Semilattice for UnionNodeSet { @@ -234,6 +279,33 @@ impl Semilattice for UnionNodeSet { } } +#[derive(Clone, Debug)] +pub enum NodeSetIterator<'a> { + Empty, + Bits(std::iter::Map<IterOnes<'a, u8, LocalBits>, fn(usize) -> ir::NodeID>), + Full(u32, u32), +} + +impl<'a> Iterator for NodeSetIterator<'a> { + type Item = NodeID; + + fn next(&mut self) -> Option<Self::Item> { + match self { + NodeSetIterator::Empty => None, + NodeSetIterator::Bits(iter) => iter.next(), + NodeSetIterator::Full(idx, cap) => { + if idx < cap { + let id = NodeID::new(*idx as usize); + *idx += 1; + Some(id) + } else { + None + } + } + } + } +} + /* * Below are some common flow functions. They all take a slice of semilattice * references as their first argument, and a node ID as their second. However, @@ -273,3 +345,47 @@ pub fn control_output_flow( out } + +/* + * Flow function for collecting all of a data node's immediate uses / users of + * control nodes. Useful for code generation. Since this is for immediate uses / + * users of control nodes, control node uses / users do not propagate through + * control nodes, or through control output nodes (phis, thread IDs, collects). + */ +pub fn immediate_control_flow( + inputs: &[&UnionNodeSet], + mut node_id: NodeID, + function: &Function, +) -> UnionNodeSet { + let mut out = UnionNodeSet::top(); + + // Step 1: replace node if this is a phi, thread ID, or collect. + if let Node::Phi { control, data: _ } + | Node::ThreadID { control } + | Node::Collect { control, data: _ } = &function.nodes[node_id.idx()] + { + node_id = *control; + } else { + // Union node inputs if not a special case. + out = inputs + .into_iter() + .fold(UnionNodeSet::top(), |a, b| UnionNodeSet::meet(&a, b)); + } + let node = &function.nodes[node_id.idx()]; + + // Step 2: figure out if this node is a control node. + let control = if let Node::ReadProd { prod, index: _ } = node { + function.nodes[prod.idx()].is_strictly_control() + } else { + node.is_strictly_control() + }; + + // Step 3: clear all bits and set bit for current node, if applicable. + if control { + let mut singular = bitvec![u8, Lsb0; 0; function.nodes.len()]; + singular.set(node_id.idx(), true); + out = UnionNodeSet::Bits(singular); + } + + out +} diff --git a/hercules_ir/src/dom.rs b/hercules_ir/src/dom.rs index fc5fc3983ca0683cdad46b336ef5479041dd91f2..b732db550c994bd673dfaa42335c33824dc965ba 100644 --- a/hercules_ir/src/dom.rs +++ b/hercules_ir/src/dom.rs @@ -1,21 +1,28 @@ -extern crate bitvec; +use std::collections::HashMap; use crate::*; -use std::collections::HashMap; - /* * Custom type for storing a dominator tree. For each control node, store its - * immediate dominator. + * immediate dominator, and its level in the dominator tree. Dominator tree + * levels are used for finding common ancestors. */ #[derive(Debug, Clone)] pub struct DomTree { - idom: HashMap<NodeID, NodeID>, + root: NodeID, + idom: HashMap<NodeID, (u32, NodeID)>, +} + +#[derive(Debug, Clone)] +pub struct DomChainIterator<'a> { + dom: &'a DomTree, + iter: Option<NodeID>, + top: NodeID, } impl DomTree { pub fn imm_dom(&self, x: NodeID) -> Option<NodeID> { - self.idom.get(&x).map(|x| x.clone()) + self.idom.get(&x).map(|x| x.1) } pub fn does_imm_dom(&self, a: NodeID, b: NodeID) -> bool { @@ -47,19 +54,90 @@ impl DomTree { self.idom.contains_key(&x) } + pub fn contains(&self, x: NodeID) -> bool { + x == self.root || self.idom.contains_key(&x) + } + /* - * Typically, node ID 0 is the root of the dom tree. Under this assumption, - * this function checks if a node is in the dom tree. + * Find the node with the lowest level in the dom tree amongst the nodes + * given. Although not technically necessary, you're probably using this + * function wrong if the nodes in the iterator do not form a dominance + * chain. */ - pub fn contains_conventional(&self, x: NodeID) -> bool { - x == NodeID::new(0) || self.idom.contains_key(&x) + pub fn lowest_amongst<I>(&self, x: I) -> NodeID + where + I: Iterator<Item = NodeID>, + { + x.map(|x| { + if x == self.root { + (0, x) + } else { + (self.idom[&x].0, x) + } + }) + .max_by(|x, y| x.0.cmp(&y.0)) + .unwrap() + .1 + } + + pub fn common_ancestor<I>(&self, x: I) -> NodeID + where + I: Iterator<Item = NodeID>, + { + let mut positions: HashMap<NodeID, u32> = x + .map(|x| (x, if x == self.root { 0 } else { self.idom[&x].0 })) + .collect(); + let mut current_level = *positions.iter().map(|(_, level)| level).max().unwrap(); + while positions.len() > 1 { + let at_current_level: Vec<NodeID> = positions + .iter() + .filter(|(_, level)| **level == current_level) + .map(|(node, _)| *node) + .collect(); + for node in at_current_level.into_iter() { + positions.remove(&node); + let (level, parent) = self.idom[&node]; + assert!(level == current_level); + positions.insert(parent, level - 1); + } + current_level -= 1; + } + positions.into_iter().next().unwrap().0 } - pub fn get_underlying_map(&self) -> &HashMap<NodeID, NodeID> { + pub fn chain<'a>(&'a self, bottom: NodeID, top: NodeID) -> DomChainIterator<'a> { + DomChainIterator { + dom: self, + iter: Some(bottom), + top, + } + } + + pub fn get_underlying_map(&self) -> &HashMap<NodeID, (u32, NodeID)> { &self.idom } } +impl<'a> Iterator for DomChainIterator<'a> { + type Item = NodeID; + + fn next(&mut self) -> Option<Self::Item> { + if let Some(iter) = self.iter { + let ret = iter; + if ret == self.top { + self.iter = None; + } else if let Some(iter) = self.dom.imm_dom(iter) { + self.iter = Some(iter); + } else { + panic!("In DomChainIterator, top node doesn't dominate bottom node.") + } + Some(ret) + } else { + None + } + } +} + /* * Top level function for calculating dominator trees. Uses the semi-NCA * algorithm, as described in "Finding Dominators in Practice". @@ -75,7 +153,7 @@ pub fn dominator(subgraph: &Subgraph, root: NodeID) -> DomTree { let mut idom = HashMap::new(); for w in preorder[1..].iter() { // Each idom starts as the parent node. - idom.insert(*w, parents[w]); + idom.insert(*w, (0, parents[w])); } // Step 2: define snca_compress, which will be used to compute semi- @@ -116,12 +194,28 @@ pub fn dominator(subgraph: &Subgraph, root: NodeID) -> DomTree { // Step 4: compute idom. for v_n in 1..preorder.len() { let v = preorder[v_n]; - while node_numbers[&idom[&v]] > semi[v_n] { - *idom.get_mut(&v).unwrap() = idom[&idom[&v]]; + while node_numbers[&idom[&v].1] > semi[v_n] { + *idom.get_mut(&v).unwrap() = idom[&idom[&v].1]; + } + } + + // Step 5: compute levels in idom. + let mut change = true; + while change { + change = false; + for node in preorder[1..].iter() { + let (level, parent) = idom[node]; + if level == 0 && parent == root { + idom.get_mut(node).unwrap().0 = 1; + change = true; + } else if level == 0 && idom[&parent].0 != 0 { + idom.get_mut(node).unwrap().0 = 1 + idom[&parent].0; + change = true; + } } } - DomTree { idom } + DomTree { root, idom } } fn preorder(subgraph: &Subgraph, root: NodeID) -> (Vec<NodeID>, HashMap<NodeID, NodeID>) { diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs index 943d04938a30add8a201f32a2245518b3ab33717..6d4364cd5518321e0d3e41d91bccad2e840166ab 100644 --- a/hercules_ir/src/lib.rs +++ b/hercules_ir/src/lib.rs @@ -1,24 +1,20 @@ pub mod build; -pub mod ccp; pub mod dataflow; -pub mod dce; pub mod def_use; pub mod dom; -pub mod gvn; pub mod ir; +pub mod loops; pub mod parse; pub mod subgraph; pub mod typecheck; pub mod verify; pub use crate::build::*; -pub use crate::ccp::*; pub use crate::dataflow::*; -pub use crate::dce::*; pub use crate::def_use::*; pub use crate::dom::*; -pub use crate::gvn::*; pub use crate::ir::*; +pub use crate::loops::*; pub use crate::parse::*; pub use crate::subgraph::*; pub use crate::typecheck::*; diff --git a/hercules_ir/src/loops.rs b/hercules_ir/src/loops.rs new file mode 100644 index 0000000000000000000000000000000000000000..cc302998b92062dcf3ff9c794566be5d409b4629 --- /dev/null +++ b/hercules_ir/src/loops.rs @@ -0,0 +1,133 @@ +extern crate bitvec; + +use std::collections::HashMap; + +use self::bitvec::prelude::*; + +use crate::*; + +/* + * Custom type for storing a loop tree. Each node corresponds to a single loop + * or a fork join pair in the IR graph. Each node in the tree corresponds to + * some subset of the overall IR graph. The root node corresponds to the entire + * IR graph. The children of the root correspond to the top-level loops and fork + * join pairs, and so on. Each node in the loop tree has a representative + * "header" node. For normal loops, this is the region node branched to by a + * dominated if node. For fork join pairs, this is the fork node. A loop is a + * top-level loop if its parent is the root node of the subgraph. + */ +#[derive(Debug, Clone)] +pub struct LoopTree { + root: NodeID, + loops: HashMap<NodeID, (BitVec<u8, Lsb0>, NodeID)>, +} + +impl LoopTree { + pub fn contains(&self, x: NodeID) -> bool { + x == self.root || self.loops.contains_key(&x) + } +} + +/* + * Top level function for calculating loop trees. + */ +pub fn loops( + subgraph: &Subgraph, + root: NodeID, + dom: &DomTree, + fork_join_map: &HashMap<NodeID, NodeID>, +) -> LoopTree { + // Step 1: collect loop back edges. + let mut loop_back_edges = vec![]; + for node in subgraph.iter() { + // Check successors. Any successor dominating its predecessor is the + // destination of a loop back edge. + for succ in subgraph.succs(*node) { + if dom.does_dom(succ, *node) { + loop_back_edges.push((*node, succ)); + } + } + } + + // Step 2: collect "edges" from joins to forks. Technically, this doesn't + // correspond to a real edge in the graph. However, our loop tree includes + // fork join pairs as loops, so create a phantom loop back edge. + for (fork, join) in fork_join_map { + loop_back_edges.push((*join, *fork)); + } + + // Step 3: find control nodes inside each loop. For a particular natural + // loop with header d and a back edge from node n to d, the nodes in the + // loop are d itself, and all nodes with a path to n not going through d. + let loop_contents = loop_back_edges.iter().map(|(n, d)| { + // Compute reachability for each loop back edge. + let mut loop_contents = loop_reachability(*n, *d, subgraph); + loop_contents.set(d.idx(), true); + (d, loop_contents) + }); + + // Step 4: merge loops with same header into a single natural loop. + let mut loops: HashMap<NodeID, BitVec<u8, Lsb0>> = HashMap::new(); + for (header, contents) in loop_contents { + if loops.contains_key(header) { + let old_contents = loops.remove(header).unwrap(); + loops.insert(*header, old_contents | contents); + } else { + loops.insert(*header, contents); + } + } + + // Step 5: figure out loop tree edges. A loop with header a can only be an + // outer loop of a loop with header b if a dominates b. + let loops = loops + .iter() + .map(|(header, contents)| { + let mut dominator = *header; + while let Some(new_dominator) = dom.imm_dom(dominator) { + dominator = new_dominator; + if let Some(outer_contents) = loops.get(&dominator) { + if outer_contents[header.idx()] { + return (*header, (contents.clone(), dominator)); + } + } + } + (*header, (contents.clone(), root)) + }) + .collect(); + + LoopTree { root, loops } +} + +fn loop_reachability(n: NodeID, d: NodeID, subgraph: &Subgraph) -> BitVec<u8, Lsb0> { + let visited = bitvec![u8, Lsb0; 0; subgraph.original_num_nodes() as usize]; + + // n is the root of the traversal, finding d is a termination condition. + let visited = loop_reachability_helper(n, d, subgraph, visited); + + visited +} + +fn loop_reachability_helper( + n: NodeID, + d: NodeID, + subgraph: &Subgraph, + mut visited: BitVec<u8, Lsb0>, +) -> BitVec<u8, Lsb0> { + if visited[n.idx()] { + // If already visited, return early. + visited + } else { + // Set visited to true. + visited.set(n.idx(), true); + + // Iterate over predecessors. + for pred in subgraph.preds(n) { + // Don't traverse d. + if pred != d { + visited = loop_reachability_helper(pred, d, subgraph, visited); + } + } + + visited + } +} diff --git a/hercules_ir/src/subgraph.rs b/hercules_ir/src/subgraph.rs index dd7cb2a60f0511c985fe876c9a88f2cb16f1f695..290abd7475c4af18e39f1f42f634c9d63f13f091 100644 --- a/hercules_ir/src/subgraph.rs +++ b/hercules_ir/src/subgraph.rs @@ -15,6 +15,7 @@ pub struct Subgraph { forward_edges: Vec<u32>, first_backward_edges: Vec<u32>, backward_edges: Vec<u32>, + original_num_nodes: u32, } pub struct SubgraphIterator<'a> { @@ -36,15 +37,32 @@ impl<'a> Iterator for SubgraphIterator<'a> { } } +impl IntoIterator for Subgraph { + type Item = NodeID; + type IntoIter = std::vec::IntoIter<Self::Item>; + + fn into_iter(self) -> Self::IntoIter { + self.nodes.into_iter() + } +} + impl Subgraph { pub fn num_nodes(&self) -> u32 { self.nodes.len() as u32 } + pub fn original_num_nodes(&self) -> u32 { + self.original_num_nodes + } + pub fn contains_node(&self, id: NodeID) -> bool { self.node_numbers.contains_key(&id) } + pub fn iter<'a>(&'a self) -> std::slice::Iter<'a, NodeID> { + self.nodes.iter() + } + pub fn preds(&self, id: NodeID) -> SubgraphIterator { let number = self.node_numbers[&id]; if ((number + 1) as usize) < self.first_backward_edges.len() { @@ -86,6 +104,7 @@ impl Subgraph { forward_edges, mut first_backward_edges, mut backward_edges, + original_num_nodes, } = self; // Since we need to add a "new" root to the subgraph, we first need to @@ -138,6 +157,7 @@ impl Subgraph { forward_edges: backward_edges, first_backward_edges: new_first_forward_edges, backward_edges: new_forward_edges, + original_num_nodes, } } } @@ -159,6 +179,7 @@ where forward_edges: vec![], first_backward_edges: vec![], backward_edges: vec![], + original_num_nodes: function.nodes.len() as u32, }; // Step 1: collect predicated nodes. diff --git a/hercules_ir/src/verify.rs b/hercules_ir/src/verify.rs index 4c9db60bb7a690652ae08d3cca4365cb7a207470..00513d52db117232cd0aa12220fd8c61766dab27 100644 --- a/hercules_ir/src/verify.rs +++ b/hercules_ir/src/verify.rs @@ -3,7 +3,7 @@ extern crate bitvec; use std::collections::HashMap; use std::iter::zip; -use verify::bitvec::prelude::*; +use self::bitvec::prelude::*; use crate::*; @@ -19,6 +19,7 @@ pub fn verify( Vec<ImmutableDefUseMap>, Vec<Vec<NodeID>>, ModuleTyping, + Vec<Subgraph>, Vec<DomTree>, Vec<DomTree>, Vec<HashMap<NodeID, NodeID>>, @@ -79,10 +80,17 @@ pub fn verify( )?; } + // Recalculate subgraphs for return since postdominator analysis modifies + // them. + let subgraphs: Vec<_> = zip(module.functions.iter(), def_uses.iter()) + .map(|(function, def_use)| control_subgraph(function, def_use)) + .collect(); + Ok(( def_uses, reverse_postorders, typing, + subgraphs, doms, postdoms, fork_join_maps, @@ -392,7 +400,7 @@ fn verify_dominance_relationships( // If the node to be added to the to_check vector isn't even in the // dominator tree, don't bother. It doesn't need to be checked for // dominance relations. - if !dom.contains_conventional(this_id) { + if !dom.contains(this_id) { continue; } @@ -419,7 +427,7 @@ fn verify_dominance_relationships( // Verify that uses of phis / collect nodes are dominated // by the corresponding region / join nodes, respectively. Node::Phi { control, data: _ } | Node::Collect { control, data: _ } => { - if dom.contains_conventional(this_id) && !dom.does_dom(control, this_id) { + if dom.contains(this_id) && !dom.does_dom(control, this_id) { Err(format!( "{} node (ID {}) doesn't dominate its use (ID {}).", function.nodes[pred_idx].upper_case_name(), @@ -431,7 +439,7 @@ fn verify_dominance_relationships( // Verify that uses of thread ID nodes are dominated by the // corresponding fork nodes. Node::ThreadID { control } => { - if dom.contains_conventional(this_id) && !dom.does_dom(control, this_id) { + if dom.contains(this_id) && !dom.does_dom(control, this_id) { Err(format!( "ThreadID node (ID {}) doesn't dominate its use (ID {}).", pred_idx, @@ -445,7 +453,7 @@ fn verify_dominance_relationships( // flows through the collect node out of the fork-join, // because after the collect, the thread ID is no longer // considered an immediate control output use. - if postdom.contains_conventional(this_id) + if postdom.contains(this_id) && !postdom.does_dom(*fork_join_map.get(&control).unwrap(), this_id) { Err(format!("ThreadID node's (ID {}) fork's join doesn't postdominate its use (ID {}).", pred_idx, this_id.idx()))?; diff --git a/hercules_opt/Cargo.toml b/hercules_opt/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..47bd9bd5dd8c40935c9cf708661d4c8bfe2e3b83 --- /dev/null +++ b/hercules_opt/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "hercules_opt" +version = "0.1.0" +authors = ["Russel Arbore <rarbore2@illinois.edu>"] + +[dependencies] +hercules_ir = { path = "../hercules_ir" } diff --git a/hercules_ir/src/ccp.rs b/hercules_opt/src/ccp.rs similarity index 93% rename from hercules_ir/src/ccp.rs rename to hercules_opt/src/ccp.rs index ded30b1ff0f10f63f9f80b649fc63807c318367c..fc5bc2166fce93c3213f3d1ef7265971fad68b51 100644 --- a/hercules_ir/src/ccp.rs +++ b/hercules_opt/src/ccp.rs @@ -1,7 +1,11 @@ +extern crate hercules_ir; + use std::collections::HashMap; use std::iter::zip; -use crate::*; +use self::hercules_ir::dataflow::*; +use self::hercules_ir::def_use::*; +use self::hercules_ir::ir::*; /* * The ccp lattice tracks, for each node, the following information: @@ -134,7 +138,7 @@ pub fn ccp( reverse_postorder: &Vec<NodeID>, ) { // Step 1: run ccp analysis to understand the function. - let result = forward_dataflow_global(&function, reverse_postorder, |inputs, node_id| { + let result = dataflow_global(&function, reverse_postorder, |inputs, node_id| { ccp_flow_function(inputs, node_id, &function, &constants) }); @@ -268,6 +272,10 @@ pub fn ccp( // remove this branch node. if let None = reachable_users.next() { // The user is a ReadProd node, which in turn has one user. + assert!( + def_use.get_users(*the_reachable_user).len() == 1, + "Control ReadProd node doesn't have exactly one user." + ); let target = def_use.get_users(*the_reachable_user)[0]; // For each use in the target of the reachable ReadProd, turn it @@ -287,6 +295,76 @@ pub fn ccp( } } } + + // Step 4: collapse region chains. + collapse_region_chains(function, def_use); +} + +/* + * Top level function to collapse region chains. A chain is a list of at least + * one region node that takes only one control input. Region chains can be + * deleted. The use of the head of the chain can turn into the use by the user + * of the tail of the chain. + */ +pub fn collapse_region_chains(function: &mut Function, def_use: &ImmutableDefUseMap) { + // Loop over all region nodes. It's fine to modify the function as we loop + // over it. + for id in (0..function.nodes.len()).map(NodeID::new) { + if let Node::Region { preds } = &function.nodes[id.idx()] { + if preds.len() == 1 { + // Step 1: bridge gap between use and user. + let predecessor = preds[0]; + let successor = def_use + .get_users(id) + .iter() + .filter(|x| !function.nodes[x.idx()].is_phi()) + .next() + .expect("Region node doesn't have a non-phi user."); + + // Set successor's use of this region to use the region's use. + for u in get_uses_mut(&mut function.nodes[successor.idx()]).as_mut() { + if **u == id { + **u = predecessor; + } + } + + // Delete this region. + function.nodes[id.idx()] = Node::Start; + + // Step 2: bridge gap between uses and users of corresponding + // phi nodes. + let phis: Vec<NodeID> = def_use + .get_users(id) + .iter() + .map(|x| *x) + .filter(|x| function.nodes[x.idx()].is_phi()) + .collect(); + for phi_id in phis { + let data_uses = + if let Node::Phi { control, data } = &function.nodes[phi_id.idx()] { + assert!(*control == id); + data + } else { + panic!() + }; + assert!(data_uses.len() == 1, "Phi node doesn't have exactly one data use, while corresponding region had exactly one control use."); + let predecessor = data_uses[0]; + + // Set successors' use of this phi to use the phi's use. + for successor in def_use.get_users(phi_id) { + for u in get_uses_mut(&mut function.nodes[successor.idx()]).as_mut() { + if **u == phi_id { + **u = predecessor; + } + } + } + + // Delete this phi. + function.nodes[phi_id.idx()] = Node::Start; + } + } + } + } } fn ccp_flow_function( diff --git a/hercules_ir/src/dce.rs b/hercules_opt/src/dce.rs similarity index 94% rename from hercules_ir/src/dce.rs rename to hercules_opt/src/dce.rs index 1f56d8648a11f707019353ba9493d32e0005113b..255402902aa6ec0bcc240cbb7bd18a9eb8945526 100644 --- a/hercules_ir/src/dce.rs +++ b/hercules_opt/src/dce.rs @@ -1,4 +1,7 @@ -use crate::*; +extern crate hercules_ir; + +use self::hercules_ir::def_use::*; +use self::hercules_ir::ir::*; /* * Top level function to run dead code elimination. Deletes nodes by setting diff --git a/hercules_ir/src/gvn.rs b/hercules_opt/src/gvn.rs similarity index 97% rename from hercules_ir/src/gvn.rs rename to hercules_opt/src/gvn.rs index c8f77244d0b12d90c4a395c69c318287d379cad9..e8337e609b3ae881c3e8a012d9cd09e212eee2c1 100644 --- a/hercules_ir/src/gvn.rs +++ b/hercules_opt/src/gvn.rs @@ -1,6 +1,9 @@ +extern crate hercules_ir; + use std::collections::HashMap; -use crate::*; +use self::hercules_ir::def_use::*; +use self::hercules_ir::ir::*; /* * Top level function to run global value numbering. In the sea of nodes, GVN is diff --git a/hercules_opt/src/lib.rs b/hercules_opt/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..309c4e2393057ef5460d2bf3eaf4465f016ffdaf --- /dev/null +++ b/hercules_opt/src/lib.rs @@ -0,0 +1,7 @@ +pub mod ccp; +pub mod dce; +pub mod gvn; + +pub use crate::ccp::*; +pub use crate::dce::*; +pub use crate::gvn::*; diff --git a/hercules_tools/Cargo.toml b/hercules_tools/Cargo.toml index 458de0e0e86e18f89a3bbb41b5a19321413f5a35..412f1022638722bd15023dbb289642634d654733 100644 --- a/hercules_tools/Cargo.toml +++ b/hercules_tools/Cargo.toml @@ -7,7 +7,13 @@ authors = ["Russel Arbore <rarbore2@illinois.edu>"] name = "hercules_dot" path = "src/hercules_dot/main.rs" +[[bin]] +name = "hercules_cpu" +path = "src/hercules_cpu/main.rs" + [dependencies] clap = { version = "*", features = ["derive"] } hercules_ir = { path = "../hercules_ir" } +hercules_opt = { path = "../hercules_opt" } +hercules_codegen = { path = "../hercules_codegen" } rand = "*" diff --git a/hercules_tools/src/hercules_cpu/main.rs b/hercules_tools/src/hercules_cpu/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..bb0a49963f2b0422aa97489addf1bcbefa241c16 --- /dev/null +++ b/hercules_tools/src/hercules_cpu/main.rs @@ -0,0 +1,77 @@ +extern crate clap; + +use std::fs::File; +use std::io::prelude::*; + +use clap::Parser; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + hir_file: String, + + #[arg(short, long, default_value_t = String::new())] + output: String, +} + +fn main() { + let args = Args::parse(); + if !args.hir_file.ends_with(".hir") { + eprintln!("WARNING: Running hercules_cpu on a file without a .hir extension - interpreting as a textual Hercules IR file."); + } + + let mut file = File::open(args.hir_file).expect("PANIC: Unable to open input file."); + let mut contents = String::new(); + file.read_to_string(&mut contents) + .expect("PANIC: Unable to read input file contents."); + let mut module = + hercules_ir::parse::parse(&contents).expect("PANIC: Failed to parse Hercules IR file."); + let (def_uses, reverse_postorders, _typing, _subgraphs, _doms, _postdoms, _fork_join_maps) = + hercules_ir::verify::verify(&mut module) + .expect("PANIC: Failed to verify Hercules IR module."); + + let mut module = module.map( + |(mut function, id), (types, mut constants, dynamic_constants)| { + hercules_opt::ccp::ccp( + &mut function, + &mut constants, + &def_uses[id.idx()], + &reverse_postorders[id.idx()], + ); + hercules_opt::dce::dce(&mut function); + function.delete_gravestones(); + + let def_use = hercules_ir::def_use::def_use(&function); + hercules_opt::gvn::gvn(&mut function, &constants, &def_use); + hercules_opt::dce::dce(&mut function); + function.delete_gravestones(); + + (function, (types, constants, dynamic_constants)) + }, + ); + let (def_uses, reverse_postorders, _typing, subgraphs, doms, _postdoms, fork_join_maps) = + hercules_ir::verify::verify(&mut module) + .expect("PANIC: Failed to verify Hercules IR module."); + + let bbs: Vec<_> = module + .functions + .iter() + .enumerate() + .map(|(idx, function)| { + hercules_codegen::gcm::gcm( + function, + &def_uses[idx], + &reverse_postorders[idx], + &subgraphs[idx], + &doms[idx], + &fork_join_maps[idx], + ) + .iter() + .map(|id| id.idx()) + .enumerate() + .collect::<Vec<_>>() + }) + .collect(); + + println!("{:?}", bbs); +} diff --git a/hercules_tools/src/hercules_dot/dot.rs b/hercules_tools/src/hercules_dot/dot.rs index 6f41f85b9336fb0f071be20b3274b53723bc1544..f71a40f18fbae210d1111e19e9094a9f5f12dd97 100644 --- a/hercules_tools/src/hercules_dot/dot.rs +++ b/hercules_tools/src/hercules_dot/dot.rs @@ -67,7 +67,7 @@ pub fn write_dot<W: Write>( // Step 2: draw dominance edges in dark green. Don't draw post dominance // edges because then xdot lays out the graph strangely. let dom = &doms[function_id.idx()]; - for (child_id, parent_id) in dom.get_underlying_map() { + for (child_id, (_, parent_id)) in dom.get_underlying_map() { write_edge( *child_id, function_id, diff --git a/hercules_tools/src/hercules_dot/main.rs b/hercules_tools/src/hercules_dot/main.rs index bb3efe5794d48bec125397814114cf452d50560b..e3543fe5b6e860a4456ea20fe60a2529ea2031d3 100644 --- a/hercules_tools/src/hercules_dot/main.rs +++ b/hercules_tools/src/hercules_dot/main.rs @@ -34,30 +34,30 @@ fn main() { .expect("PANIC: Unable to read input file contents."); let mut module = hercules_ir::parse::parse(&contents).expect("PANIC: Failed to parse Hercules IR file."); - let (def_uses, reverse_postorders, _typing, _doms, _postdoms, _fork_join_maps) = + let (def_uses, reverse_postorders, _typing, _subgraphs, _doms, _postdoms, _fork_join_maps) = hercules_ir::verify::verify(&mut module) .expect("PANIC: Failed to verify Hercules IR module."); let mut module = module.map( |(mut function, id), (types, mut constants, dynamic_constants)| { - hercules_ir::ccp::ccp( + hercules_opt::ccp::ccp( &mut function, &mut constants, &def_uses[id.idx()], &reverse_postorders[id.idx()], ); - hercules_ir::dce::dce(&mut function); + hercules_opt::dce::dce(&mut function); function.delete_gravestones(); let def_use = hercules_ir::def_use::def_use(&function); - hercules_ir::gvn::gvn(&mut function, &constants, &def_use); - hercules_ir::dce::dce(&mut function); + hercules_opt::gvn::gvn(&mut function, &constants, &def_use); + hercules_opt::dce::dce(&mut function); function.delete_gravestones(); (function, (types, constants, dynamic_constants)) }, ); - let (_def_use, _reverse_postorders, typing, doms, _postdoms, fork_join_maps) = + let (_def_uses, _reverse_postorders, typing, _subgraphs, doms, _postdoms, fork_join_maps) = hercules_ir::verify::verify(&mut module) .expect("PANIC: Failed to verify Hercules IR module.");