diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs index 8b5df931807326c5262d1a29d4bd35e5357ee176..eaa7374b7a8f6c3df66a9a5085d179a143b81d34 100644 --- a/hercules_cg/src/cpu.rs +++ b/hercules_cg/src/cpu.rs @@ -1,4 +1,5 @@ extern crate bitvec; +extern crate hercules_ir; use std::cell::{Cell, RefCell}; use std::collections::{HashMap, HashSet, VecDeque}; @@ -7,6 +8,8 @@ use std::iter::once; use self::bitvec::prelude::*; +use self::hercules_ir::*; + use crate::*; /* diff --git a/hercules_cg/src/sched_dot.rs b/hercules_cg/src/sched_dot.rs index b997138d2f5c7506d2bd50322ac3e852631e7f58..f044618931f0ffe45aeb6da1ef2798f4bc7bdfe6 100644 --- a/hercules_cg/src/sched_dot.rs +++ b/hercules_cg/src/sched_dot.rs @@ -1,4 +1,5 @@ extern crate bitvec; +extern crate hercules_ir; extern crate rand; use std::collections::{HashMap, VecDeque}; @@ -9,9 +10,10 @@ use std::io::Write as _; use std::process::Command; use self::bitvec::prelude::*; - use self::rand::Rng; +use self::hercules_ir::*; + use crate::*; /* diff --git a/hercules_cg/src/sched_schedule.rs b/hercules_cg/src/sched_schedule.rs index 5300b990efeb2d345de1085bda746595570ecb97..16720bbc60380866cd2dbd868ebf9f6cbd5a91fe 100644 --- a/hercules_cg/src/sched_schedule.rs +++ b/hercules_cg/src/sched_schedule.rs @@ -1,6 +1,10 @@ +extern crate hercules_ir; + use std::collections::{HashMap, HashSet, VecDeque}; use std::iter::{empty, once, zip}; +use self::hercules_ir::*; + use crate::*; /* diff --git a/hercules_ir/src/callgraph.rs b/hercules_ir/src/callgraph.rs new file mode 100644 index 0000000000000000000000000000000000000000..84be922dea8a89732bf2f2ad0d9fe3f3865d5d90 --- /dev/null +++ b/hercules_ir/src/callgraph.rs @@ -0,0 +1,106 @@ +use crate::*; + +/* + * Custom type for an immutable call graph. + */ +#[derive(Debug, Clone)] +pub struct CallGraph { + first_callees: Vec<u32>, + callees: Vec<FunctionID>, + first_callers: Vec<u32>, + callers: Vec<FunctionID>, +} + +impl CallGraph { + pub fn num_callees(&self, id: FunctionID) -> usize { + if id.idx() + 1 < self.first_callees.len() { + self.first_callees[id.idx() + 1] as usize - self.first_callees[id.idx()] as usize + } else { + self.callees.len() - self.first_callees[id.idx()] as usize + } + } + + pub fn get_callees(&self, id: FunctionID) -> &[FunctionID] { + let first_callee = self.first_callees[id.idx()] as usize; + let num_callees = self.num_callees(id) as usize; + &self.callees[first_callee..first_callee + num_callees] + } + + pub fn num_callers(&self, id: FunctionID) -> usize { + if id.idx() + 1 < self.first_callers.len() { + self.first_callers[id.idx() + 1] as usize - self.first_callers[id.idx()] as usize + } else { + self.callers.len() - self.first_callers[id.idx()] as usize + } + } + + pub fn get_callers(&self, id: FunctionID) -> &[FunctionID] { + let first_caller = self.first_callers[id.idx()] as usize; + let num_callers = self.num_callers(id) as usize; + &self.callers[first_caller..first_caller + num_callers] + } + + pub fn num_functions(&self) -> usize { + self.first_callees.len() + } +} + +/* + * Top level function to calculate the call graph of a Hercules module. + */ +pub fn callgraph(module: &Module) -> CallGraph { + // Step 1: collect the functions called in each function. + let callee_functions: Vec<Vec<FunctionID>> = module + .functions + .iter() + .map(|func| { + let mut called: Vec<_> = func + .nodes + .iter() + .filter_map(|node| { + if let Node::Call { + control: _, + function, + dynamic_constants: _, + args: _, + } = node + { + Some(*function) + } else { + None + } + }) + .collect(); + called.sort_unstable(); + called.dedup(); + called + }) + .collect(); + + // Step 2: collect the functions calling each function. + let mut caller_functions = vec![vec![]; callee_functions.len()]; + for (caller_idx, callees) in callee_functions.iter().enumerate() { + let caller_id = FunctionID::new(caller_idx); + for callee in callees { + caller_functions[callee.idx()].push(caller_id); + } + } + + // Step 3: pack callee/caller info into CallGraph structure. + let mut callgraph = CallGraph { + first_callees: vec![], + callees: vec![], + first_callers: vec![], + callers: vec![], + }; + for callees in callee_functions { + callgraph.first_callees.push(callgraph.callees.len() as u32); + callgraph.callees.extend(callees); + } + for callers in caller_functions { + callgraph.first_callers.push(callgraph.callers.len() as u32); + callgraph.callers.extend(callers); + } + + callgraph +} diff --git a/hercules_ir/src/def_use.rs b/hercules_ir/src/def_use.rs index 3cca0e757d04918ec7c7c67eee2590c8bd1c888f..d7c7b3a166a4f31bbb58331861a9ee9984cad536 100644 --- a/hercules_ir/src/def_use.rs +++ b/hercules_ir/src/def_use.rs @@ -64,7 +64,6 @@ pub fn def_use(function: &Function) -> ImmutableDefUseMap { } } - // Step 5: pack and return. ImmutableDefUseMap { first_edges, users } } diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs index 9991004f7b093704578da0ac373f2e8c0559f968..5cf549a808056d79fb8bf4713d5308fb772aa294 100644 --- a/hercules_ir/src/ir.rs +++ b/hercules_ir/src/ir.rs @@ -1468,6 +1468,11 @@ impl Intrinsic { * Rust things to make newtyped IDs usable. */ +pub trait ID: Clone + Eq + Ord + std::hash::Hash + Copy { + fn new(x: usize) -> Self; + fn idx(&self) -> usize; +} + #[macro_export] macro_rules! define_id_type { ($x: ident) => { @@ -1486,12 +1491,12 @@ macro_rules! define_id_type { )] pub struct $x(u32); - impl $x { - pub fn new(x: usize) -> Self { + impl ID for $x { + fn new(x: usize) -> Self { $x(x as u32) } - pub fn idx(&self) -> usize { + fn idx(&self) -> usize { self.0 as usize } } diff --git a/hercules_ir/src/lib.rs b/hercules_ir/src/lib.rs index 8dd42d6c13e2d6d230cda758bad5a9aaafe81b71..f7277cfa41226efef51bfd3361b36236f2916b8b 100644 --- a/hercules_ir/src/lib.rs +++ b/hercules_ir/src/lib.rs @@ -8,6 +8,7 @@ pub mod antideps; pub mod build; +pub mod callgraph; pub mod dataflow; pub mod def_use; pub mod dom; @@ -23,6 +24,7 @@ pub mod verify; pub use crate::antideps::*; pub use crate::build::*; +pub use crate::callgraph::*; pub use crate::dataflow::*; pub use crate::def_use::*; pub use crate::dom::*; diff --git a/hercules_opt/src/editor.rs b/hercules_opt/src/editor.rs index 96e6edf0c70b874cc871fd7ff53e39c7e15138be..b830af98ffd22aaf7078bf92e6633f8003ff9415 100644 --- a/hercules_opt/src/editor.rs +++ b/hercules_opt/src/editor.rs @@ -258,6 +258,10 @@ impl<'a, 'b> FunctionEdit<'a, 'b> { } } + fn is_mutable(&self, id: NodeID) -> bool { + id.idx() >= self.editor.mutable_nodes.len() || self.editor.mutable_nodes[id.idx()] + } + pub fn add_node(&mut self, node: Node) -> NodeID { let id = NodeID::new(self.editor.function.nodes.len() + self.added_nodeids.len()); // Added nodes need to have an entry in the def-use map. @@ -277,7 +281,7 @@ impl<'a, 'b> FunctionEdit<'a, 'b> { pub fn delete_node(mut self, id: NodeID) -> Result<Self, Self> { // We can only delete mutable nodes. Return None if we try to modify an // immutable node, as it means the whole edit should be aborted. - if self.editor.mutable_nodes[id.idx()] { + if self.is_mutable(id) { assert!( !self.added_nodeids.contains(&id), "PANIC: Please don't delete a node that was added in the same edit." @@ -302,7 +306,7 @@ impl<'a, 'b> FunctionEdit<'a, 'b> { // We can only replace uses of mutable nodes. Return None if we try to // replace uses of an immutable node, as it means the whole edit should // be aborted. - if self.editor.mutable_nodes[old.idx()] { + if self.is_mutable(old) { // Update all of the users of the old node. self.ensure_updated_def_use_entry(old); for user_id in self.updated_def_use[&old].iter() { diff --git a/hercules_opt/src/inline.rs b/hercules_opt/src/inline.rs new file mode 100644 index 0000000000000000000000000000000000000000..bd192c39772d3cf70e764ad5761b7465246acd78 --- /dev/null +++ b/hercules_opt/src/inline.rs @@ -0,0 +1,262 @@ +extern crate hercules_ir; + +use std::collections::HashMap; + +use self::hercules_ir::callgraph::*; +use self::hercules_ir::def_use::*; +use self::hercules_ir::ir::*; +use self::hercules_ir::schedule::*; + +use crate::*; + +/* + * Top level function to run inlining. Currently, inlines every function call, + * since mutual recursion is not valid in Hercules IR. + */ +pub fn inline( + editors: &mut [FunctionEditor], + callgraph: &CallGraph, + mut plans: Option<&mut Vec<Plan>>, +) { + // Step 1: run topological sort on the call graph to inline the "deepest" + // function first. Mutual recursion is not currently supported, so assert + // that a topological sort exists. + let mut num_calls: Vec<usize> = (0..editors.len()) + .map(|idx| callgraph.num_callees(FunctionID::new(idx))) + .collect(); + let mut no_calls_stack: Vec<FunctionID> = num_calls + .iter() + .enumerate() + .filter(|(_, num)| **num == 0) + .map(|(idx, _)| FunctionID::new(idx)) + .collect(); + let mut topo = vec![]; + while let Some(no_call_func) = no_calls_stack.pop() { + topo.push(no_call_func); + for caller in callgraph.get_callers(no_call_func) { + num_calls[caller.idx()] -= 1; + if num_calls[caller.idx()] == 0 { + no_calls_stack.push(*caller); + } + } + } + assert_eq!( + topo.len(), + editors.len(), + "PANIC: Found mutual recursion in Hercules IR." + ); + + // Step 2: make sure each function has a single return node. If an edit + // failed to make a function have a single return node, then we can't inline + // calls of it. + let single_return_nodes: Vec<_> = editors + .iter_mut() + .map(|editor| collapse_returns(editor)) + .collect(); + + // Step 3: run inlining on each function individually. Iterate the functions + // in topological order. + for to_inline_id in topo { + // Since Rust cannot analyze the accesses into an array of mutable + // references, we need to do some weirdness here to simultaneously get: + // 1. A mutable reference to the function we're modifying. + // 2. Shared references to all of the functions called by that function. + // We need to get the same for plans, if we receive them. + let callees = callgraph.get_callees(to_inline_id); + let editor_refs = get_mut_and_immuts(editors, to_inline_id, callees); + let plan_refs = plans + .as_mut() + .map(|plans| get_mut_and_immuts(*plans, to_inline_id, callees)); + inline_func( + editor_refs.0, + editor_refs.1, + plan_refs, + &single_return_nodes, + ); + } +} + +/* + * Helper function to get from an array of mutable references: + * 1. A single mutable reference. + * 2. Several shared references. + * Where none of the references alias. We need to use this both for function + * editors and plans. + */ +fn get_mut_and_immuts<'a, T, I: ID>( + mut_refs: &'a mut [T], + mut_id: I, + shared_id: &[I], +) -> (&'a mut T, HashMap<I, &'a T>) { + let mut all_id = Vec::from(shared_id); + all_id.sort_unstable(); + all_id.insert(all_id.binary_search(&mut_id).unwrap_err(), mut_id); + let mut mut_ref = None; + let mut shared_refs = HashMap::new(); + let mut cursor = 0; + let mut slice = &mut *mut_refs; + for id in all_id { + let (left, right) = slice.split_at_mut(id.idx() - cursor); + cursor += left.len() + 1; + let (left, right) = right.split_at_mut(1); + let item = &mut left[0]; + if id == mut_id { + assert!(mut_ref.is_none()); + mut_ref = Some(item); + } else { + shared_refs.insert(id, &*item); + } + slice = right; + } + + (mut_ref.unwrap(), shared_refs) +} + +/* + * Run inlining on a single function. Pass a mutable reference to the function + * to modify and shared references for all called functions. + */ +fn inline_func( + editor: &mut FunctionEditor, + called: HashMap<FunctionID, &FunctionEditor>, + plans: Option<(&mut Plan, HashMap<FunctionID, &Plan>)>, + single_return_nodes: &Vec<Option<NodeID>>, +) { + let first_num_nodes = editor.func().nodes.len(); + for id in (0..first_num_nodes).map(NodeID::new) { + // Break down the call node. + let Node::Call { + control, + function, + ref dynamic_constants, + ref args, + } = editor.func().nodes[id.idx()] + else { + continue; + }; + + // Assemble all the info we'll need to do the edit. + let dcs = dynamic_constants.clone(); + assert!( + dcs.is_empty(), + "TODO: Implement inlining dynamic constant arguments." + ); + let args = args.clone(); + let old_num_nodes = editor.func().nodes.len(); + let old_id_to_new_id = |old_id: NodeID| NodeID::new(old_id.idx() + old_num_nodes); + let call_pred = get_uses(&editor.func().nodes[control.idx()]); + assert_eq!(call_pred.as_ref().len(), 1); + let call_pred = call_pred.as_ref()[0]; + let called_func = called[&function].func(); + // We can't inline calls to functions with multiple returns. + let Some(called_return) = single_return_nodes[function.idx()] else { + continue; + }; + let called_return_uses = get_uses(&called_func.nodes[called_return.idx()]); + let called_return_pred = called_return_uses.as_ref()[0]; + let called_return_data = called_return_uses.as_ref()[1]; + + // Perform the actual edit. + let success = editor.edit(|mut edit| { + // Insert the nodes from the called function. There are a few + // special cases: + // - Start: don't add start nodes - later, we'll replace_all_uses on + // the start node with the one predecessor of the call's region + // node. + // - Parameter: don't add parameter nodes - later, we'll + // replace_all_uses on the parameter nodes with the arguments to + // the call node. + // - Return: don't add return nodes - later, we'll replace_all_uses + // on the call's region node with the predecessor to the return + // node. + for (idx, node) in called_func.nodes.iter().enumerate() { + if node.is_start() || node.is_parameter() || node.is_return() { + // We still need to add some node to make sure the IDs line + // up. Just add a gravestone. + edit.add_node(Node::Start); + continue; + } + // Get the node from the callee function and replace all the + // uses with the to-be IDs in the caller function. + let mut node = node.clone(); + let mut uses = get_uses_mut(&mut node); + for u in uses.as_mut() { + **u = old_id_to_new_id(**u); + } + // Add the node and check that the IDs line up. + let add_id = edit.add_node(node); + assert_eq!(add_id, old_id_to_new_id(NodeID::new(idx))); + } + + // Stitch the control use of the inlined start node with the + // predecessor control node of the call's region. + let start_node = &called_func.nodes[0]; + assert!(start_node.is_start()); + let start_id = old_id_to_new_id(NodeID::new(0)); + edit = edit.replace_all_uses(start_id, call_pred)?; + + // Stich the control use of the original call node's region with + // the predecessor control of the inlined function's return. + edit = edit.replace_all_uses(control, called_return_pred)?; + + // Stitch uses of parameter nodes in the inlined function to the IDs + // of arguments provided to the call node. + for (node_idx, node) in called_func.nodes.iter().enumerate() { + if let Node::Parameter { index } = node { + let param_id = old_id_to_new_id(NodeID::new(node_idx)); + edit = edit.replace_all_uses(param_id, args[*index])?; + } + } + + // Finally, delete the call node. + edit = edit.replace_all_uses(id, old_id_to_new_id(called_return_data))?; + edit = edit.delete_node(control)?; + edit = edit.delete_node(id)?; + + Ok(edit) + }); + } +} + +/* + * Top level function to make a function have only a single return. + */ +pub fn collapse_returns(editor: &mut FunctionEditor) -> Option<NodeID> { + let returns: Vec<NodeID> = (0..editor.func().nodes.len()) + .filter(|idx| editor.func().nodes[*idx].is_return()) + .map(NodeID::new) + .collect(); + assert!(!returns.is_empty()); + if returns.len() == 1 { + return Some(returns[0]); + } + let preds_before_returns: Vec<NodeID> = returns + .iter() + .map(|ret_id| get_uses(&editor.func().nodes[ret_id.idx()]).as_ref()[0]) + .collect(); + let data_to_return: Vec<NodeID> = returns + .iter() + .map(|ret_id| get_uses(&editor.func().nodes[ret_id.idx()]).as_ref()[1]) + .collect(); + + // All of the old returns get replaced in a single edit. + let mut new_return = None; + editor.edit(|mut edit| { + let region = edit.add_node(Node::Region { + preds: preds_before_returns.into_boxed_slice(), + }); + let phi = edit.add_node(Node::Phi { + control: region, + data: data_to_return.into_boxed_slice(), + }); + for ret in returns { + edit = edit.delete_node(ret)?; + } + new_return = Some(edit.add_node(Node::Return { + control: region, + data: phi, + })); + Ok(edit) + }); + new_return +} diff --git a/hercules_opt/src/lib.rs b/hercules_opt/src/lib.rs index ff789dd2da1648fee29796871505a9f4fd642dc1..bc2e868784e56e6689bca008637f6f4781788cc1 100644 --- a/hercules_opt/src/lib.rs +++ b/hercules_opt/src/lib.rs @@ -6,6 +6,7 @@ pub mod editor; pub mod fork_guard_elim; pub mod forkify; pub mod gvn; +pub mod inline; pub mod pass; pub mod phi_elim; pub mod pred; @@ -17,6 +18,7 @@ pub use crate::editor::*; pub use crate::fork_guard_elim::*; pub use crate::forkify::*; pub use crate::gvn::*; +pub use crate::inline::*; pub use crate::pass::*; pub use crate::phi_elim::*; pub use crate::pred::*; diff --git a/hercules_opt/src/pass.rs b/hercules_opt/src/pass.rs index 5db24e3027dd8d6d5928262b91ac9b893ce19485..8161716984913a53d0942ba7bc9c5975403583e9 100644 --- a/hercules_opt/src/pass.rs +++ b/hercules_opt/src/pass.rs @@ -32,6 +32,7 @@ pub enum Pass { ForkGuardElim, Predication, SROA, + Inline, Verify, // Parameterized over whether analyses that aid visualization are necessary. // Useful to set to false if displaying a potentially broken module. @@ -66,6 +67,7 @@ pub struct PassManager { pub loops: Option<Vec<LoopTree>>, pub antideps: Option<Vec<Vec<(NodeID, NodeID)>>>, pub bbs: Option<Vec<Vec<NodeID>>>, + pub callgraph: Option<CallGraph>, // Current plan. pub plans: Option<Vec<Plan>>, @@ -90,6 +92,7 @@ impl PassManager { loops: None, antideps: None, bbs: None, + callgraph: None, plans: None, manifests: None, } @@ -280,6 +283,12 @@ impl PassManager { } } + pub fn make_callgraph(&mut self) { + if self.callgraph.is_none() { + self.callgraph = Some(callgraph(&self.module)); + } + } + pub fn set_plans(&mut self, plans: Vec<Plan>) { self.plans = Some(plans); } @@ -497,6 +506,45 @@ impl PassManager { self.legacy_repair_plan(); self.clear_analyses(); } + Pass::Inline => { + self.make_def_uses(); + self.make_callgraph(); + let def_uses = self.def_uses.as_ref().unwrap(); + let callgraph = self.callgraph.as_ref().unwrap(); + let constants_ref = RefCell::new(std::mem::take(&mut self.module.constants)); + let dynamic_constants_ref = + RefCell::new(std::mem::take(&mut self.module.dynamic_constants)); + let types_ref = RefCell::new(std::mem::take(&mut self.module.types)); + let mut editors: Vec<_> = + zip(self.module.functions.iter_mut(), def_uses.iter()) + .map(|(func, def_use)| { + FunctionEditor::new( + func, + &constants_ref, + &dynamic_constants_ref, + &types_ref, + def_use, + ) + }) + .collect(); + // Inlining is special in that it may modify partitions in a + // inter-procedural fashion. + inline(&mut editors, callgraph, self.plans.as_mut()); + self.module.constants = constants_ref.take(); + self.module.dynamic_constants = dynamic_constants_ref.take(); + self.module.types = types_ref.take(); + let edits: Vec<_> = editors.into_iter().map(|editor| editor.edits()).collect(); + for idx in 0..edits.len() { + if let Some(plans) = self.plans.as_mut() { + repair_plan(&mut plans[idx], &self.module.functions[idx], &edits[idx]); + } + let grave_mapping = self.module.functions[idx].delete_gravestones(); + if let Some(plans) = self.plans.as_mut() { + plans[idx].fix_gravestones(&grave_mapping); + } + } + self.clear_analyses(); + } Pass::Verify => { let ( def_uses, @@ -679,6 +727,7 @@ impl PassManager { self.loops = None; self.antideps = None; self.bbs = None; + self.callgraph = None; // Don't clear the plan - this is repaired, not reconstructed. }