diff --git a/hercules_ir/src/dot.rs b/hercules_ir/src/dot.rs index 8efabd7a99157b9bb8b40e5abe76d2d42513bb79..5ccda9dc6bf39c016ba44b6eb1085679288d5c54 100644 --- a/hercules_ir/src/dot.rs +++ b/hercules_ir/src/dot.rs @@ -16,6 +16,7 @@ use crate::*; pub fn xdot_module( module: &ir::Module, reverse_postorders: &Vec<Vec<NodeID>>, + typing: Option<&ModuleTyping>, doms: Option<&Vec<DomTree>>, fork_join_maps: Option<&Vec<HashMap<NodeID, NodeID>>>, devices: Option<&Vec<Device>>, @@ -30,6 +31,7 @@ pub fn xdot_module( write_dot( &module, &reverse_postorders, + typing, doms, fork_join_maps, devices, @@ -53,6 +55,7 @@ pub fn xdot_module( pub fn write_dot<W: Write>( module: &ir::Module, reverse_postorders: &Vec<Vec<NodeID>>, + typing: Option<&ModuleTyping>, doms: Option<&Vec<DomTree>>, fork_join_maps: Option<&Vec<HashMap<NodeID, NodeID>>>, devices: Option<&Vec<Device>>, @@ -89,6 +92,7 @@ pub fn write_dot<W: Write>( function_id, color, module, + typing, &function.schedules[node_id.idx()], w, )?; @@ -249,6 +253,7 @@ fn write_node<W: Write>( function_id: FunctionID, color: &str, module: &Module, + typing: Option<&ModuleTyping>, schedules: &Vec<Schedule>, w: &mut W, ) -> std::fmt::Result { @@ -331,30 +336,37 @@ fn write_node<W: Write>( } else { format!("{} ({})", node.upper_case_name(), suffix) }; + let xlabel = format!("{}", node_id.idx()); + let mut tylabel = String::new(); + if let Some(ty) = typing.map(|typing| typing[function_id.idx()][node_id.idx()]) { + module.write_type(ty, &mut tylabel)?; + } let mut iter = schedules.into_iter(); if let Some(first) = iter.next() { - let subtitle = iter.fold(format!("{:?}", first), |b, i| format!("{}, {:?}", b, i)); + let schedules = iter.fold(format!("{:?}", first), |b, i| format!("{}, {:?}", b, i)); write!( w, - "{}_{}_{} [xlabel={}, label=<{}<BR /><FONT POINT-SIZE=\"8\">{}</FONT>>, color={}];\n", + "{}_{}_{} [xlabel={}, label=<{}<BR /><FONT POINT-SIZE=\"8\">{}</FONT><BR /><FONT POINT-SIZE=\"8\">{}</FONT>>, color={}];\n", node.lower_case_name(), function_id.idx(), node_id.idx(), - node_id.idx(), + xlabel, label, - subtitle, + tylabel, + schedules, color )?; } else { write!( w, - "{}_{}_{} [xlabel={}, label=\"{}\", color={}];\n", + "{}_{}_{} [xlabel={}, label=<{}<BR /><FONT POINT-SIZE=\"8\">{}</FONT>>, color={}];\n", node.lower_case_name(), function_id.idx(), node_id.idx(), - node_id.idx(), + xlabel, label, + tylabel, color )?; } diff --git a/hercules_ir/src/ir.rs b/hercules_ir/src/ir.rs index f62c00c15f9e8715254c515834d8fe63c2715539..bf7806dcc371112419bbf7e21ef3b206df3a2e32 100644 --- a/hercules_ir/src/ir.rs +++ b/hercules_ir/src/ir.rs @@ -628,6 +628,7 @@ pub fn dynamic_constants_bottom_up( dynamic_constants: &Vec<DynamicConstant>, ) -> impl Iterator<Item = DynamicConstantID> + '_ { let mut visited = bitvec![u8, Lsb0; 0; dynamic_constants.len()]; + let mut invalid = bitvec![u8, Lsb0; 0; dynamic_constants.len()]; let mut stack = (0..dynamic_constants.len()) .map(DynamicConstantID::new) .collect::<Vec<DynamicConstantID>>(); @@ -647,13 +648,16 @@ pub fn dynamic_constants_bottom_up( // We have to yield the children of this node before // this node itself. We keep track of which nodes have // yielded using visited. - if left.idx() >= visited.len() || right.idx() >= visited.len() { + if left.idx() >= visited.len() + || right.idx() >= visited.len() + || invalid[left.idx()] + || invalid[right.idx()] + { // This is an invalid dynamic constant and should be // skipped. + invalid.set(id.idx(), true); continue; - } - let can_yield = visited[left.idx()] && visited[right.idx()]; - if can_yield { + } else if visited[left.idx()] && visited[right.idx()] { visited.set(id.idx(), true); yield id; } else { diff --git a/hercules_opt/src/device_placement.rs b/hercules_opt/src/device_placement.rs deleted file mode 100644 index 2badd69df428db12a0f3a46ac4aee05f8154d171..0000000000000000000000000000000000000000 --- a/hercules_opt/src/device_placement.rs +++ /dev/null @@ -1,3 +0,0 @@ -use hercules_ir::ir::*; - -use crate::*; diff --git a/hercules_opt/src/editor.rs b/hercules_opt/src/editor.rs index f6a00c8582b8289062d353bc06a5b65e32daac19..39f1184cc947a35418641a817a86321343f101fc 100644 --- a/hercules_opt/src/editor.rs +++ b/hercules_opt/src/editor.rs @@ -1,11 +1,9 @@ use std::borrow::Borrow; use std::cell::{Ref, RefCell}; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashSet}; use std::mem::take; use std::ops::Deref; -use nestify::nest; - use bitvec::prelude::*; use either::Either; @@ -156,10 +154,6 @@ impl<'a: 'b, 'b> FunctionEditor<'a> { self.modified } - pub fn node(&self, node: impl Borrow<NodeID>) -> &Node { - &self.function.nodes[node.borrow().idx()] - } - pub fn edit<F>(&'b mut self, edit: F) -> bool where F: FnOnce(FunctionEdit<'a, 'b>) -> Result<FunctionEdit<'a, 'b>, FunctionEdit<'a, 'b>>, @@ -342,6 +336,10 @@ impl<'a: 'b, 'b> FunctionEditor<'a> { self.function_id } + pub fn node(&self, node: impl Borrow<NodeID>) -> &Node { + &self.function.nodes[node.borrow().idx()] + } + pub fn get_types(&self) -> Ref<'_, Vec<Type>> { self.types.borrow() } @@ -363,7 +361,7 @@ impl<'a: 'b, 'b> FunctionEditor<'a> { .as_ref() .into_iter() .map(|x| *x) - .collect::<Vec<_>>() // @(xrouth): wtf??? + .collect::<Vec<_>>() .into_iter() } diff --git a/hercules_opt/src/fork_concat_split.rs b/hercules_opt/src/fork_concat_split.rs index 1339a38436bcf1db5a613d3cb121d1f67d612a2e..bb3a2cff556077d2bf3fe54a7fa21d0dd6d4e4b9 100644 --- a/hercules_opt/src/fork_concat_split.rs +++ b/hercules_opt/src/fork_concat_split.rs @@ -7,7 +7,8 @@ use crate::*; /* * Split multi-dimensional fork-joins into separate one-dimensional fork-joins. - * Useful for code generation. + * Useful for code generation. A single iteration of `fork_split` only splits + * at most one fork-join, it must be called repeatedly to split all fork-joins. */ pub fn fork_split( editor: &mut FunctionEditor, diff --git a/hercules_opt/src/fork_guard_elim.rs b/hercules_opt/src/fork_guard_elim.rs index 435e63b6eb0a2b8cc91adbb50451a0caddd2b16a..1abb89672ae1d5c4f0f34578ca9d8eb2d69a2bc0 100644 --- a/hercules_opt/src/fork_guard_elim.rs +++ b/hercules_opt/src/fork_guard_elim.rs @@ -1,11 +1,8 @@ use std::collections::{HashMap, HashSet}; -use either::Either; -use hercules_ir::get_uses_mut; -use hercules_ir::ir::*; -use hercules_ir::ImmutableDefUseMap; +use hercules_ir::*; -use crate::FunctionEditor; +use crate::*; /* * This is a Hercules IR transformation that: @@ -20,31 +17,17 @@ use crate::FunctionEditor; * guard remains and in these cases the guard is no longer needed. */ -/* Given a node index and the node itself, return None if the node is not - * a guarded fork where we can eliminate the guard. - * If the node is a fork with a guard we can eliminate returns a tuple of - * - This node's NodeID - * - The replication factor of the fork - * - The ID of the if of the guard - * - The ID of the projections of the if - * - The guard's predecessor - * - A map of NodeIDs for the phi nodes to the reduce they should be replaced - * with, and also the region that joins the guard's branches mapping to the - * fork's join NodeID - * - If the replication factor is a max that can be eliminated. - */ - // Simplify factors through max enum Factor { Max(usize, DynamicConstantID), - Normal(usize, DynamicConstantID), + Normal(DynamicConstantID), } impl Factor { fn get_id(&self) -> DynamicConstantID { match self { Factor::Max(_, dynamic_constant_id) => *dynamic_constant_id, - Factor::Normal(_, dynamic_constant_id) => *dynamic_constant_id, + Factor::Normal(dynamic_constant_id) => *dynamic_constant_id, } } } @@ -61,6 +44,19 @@ struct GuardedFork { factor: Factor, // The factor that matches the guard } +/* Given a node index and the node itself, return None if the node is not + * a guarded fork where we can eliminate the guard. + * If the node is a fork with a guard we can eliminate returns a tuple of + * - This node's NodeID + * - The replication factor of the fork + * - The ID of the if of the guard + * - The ID of the projections of the if + * - The guard's predecessor + * - A map of NodeIDs for the phi nodes to the reduce they should be replaced + * with, and also the region that joins the guard's branches mapping to the + * fork's join NodeID + * - If the replication factor is a max that can be eliminated. + */ fn guarded_fork( editor: &mut FunctionEditor, fork_join_map: &HashMap<NodeID, NodeID>, @@ -73,10 +69,9 @@ fn guarded_fork( return None; }; - let factors = factors.iter().enumerate().map(|(idx, dc)| { - // FIXME: Can we hide .idx() in an impl Index or something so we don't index Vec<Nodes> iwht DynamicConstantId.idx() + let mut factors = factors.iter().enumerate().map(|(idx, dc)| { let DynamicConstant::Max(l, r) = *editor.get_dynamic_constant(*dc) else { - return Factor::Normal(idx, *dc); + return Factor::Normal(*dc); }; // There really needs to be a better way to work w/ associativity. @@ -90,7 +85,7 @@ fn guarded_fork( match id { Some(v) => Factor::Max(idx, *v), - None => Factor::Normal(idx, *dc), + None => Factor::Normal(*dc), } }); @@ -140,24 +135,22 @@ fn guarded_fork( } // Match Factor - let factor = factors.clone().find(|factor| { - // This clone on the dc is painful. + let factor = factors.find(|factor| { match ( &function.nodes[pattern_factor.idx()], - editor.get_dynamic_constant(factor.get_id()).clone(), + &*editor.get_dynamic_constant(factor.get_id()), ) { (Node::Constant { id }, DynamicConstant::Constant(v)) => { let Constant::UnsignedInteger64(pattern_v) = *editor.get_constant(*id) else { return false; }; - pattern_v == (v as u64) + pattern_v == (*v as u64) } (Node::DynamicConstant { id }, _) => *id == factor.get_id(), _ => false, } }); - // return Factor factor }) } @@ -184,12 +177,10 @@ fn guarded_fork( } // Match Factor - // FIXME: Implement dc / constant matching as in case where branch_idx == 1 - let factor = factors.clone().find(|factor| { + let factor = factors.find(|factor| { function.nodes[pattern_factor.idx()].try_dynamic_constant() == Some(factor.get_id()) }); - // return Factor factor }) } else { @@ -229,7 +220,7 @@ fn guarded_fork( } else { return None; }; - // Other predecessor needs to be the other read from the guard's if + // Other predecessor needs to be the other projection from the guard's if let Node::Projection { control: if_node2, ref selection, @@ -317,8 +308,6 @@ fn guarded_fork( /* * Top level function to run fork guard elimination, as described above. - * Deletes nodes by setting nodes to gravestones. Works with a function already - * containing gravestones. */ pub fn fork_guard_elim(editor: &mut FunctionEditor, fork_join_map: &HashMap<NodeID, NodeID>) { let guard_info = editor @@ -340,7 +329,7 @@ pub fn fork_guard_elim(editor: &mut FunctionEditor, fork_join_map: &HashMap<Node { let new_fork_info = if let Factor::Max(idx, dc) = factor { let Node::Fork { - control, + control: _, mut factors, } = editor.func().nodes[fork.idx()].clone() else { diff --git a/hercules_opt/src/fork_transforms.rs b/hercules_opt/src/fork_transforms.rs index edf26911aa4a43854842d986d794638dacdc7d5a..a4605bec7824255b0098cafabb50ed5446773947 100644 --- a/hercules_opt/src/fork_transforms.rs +++ b/hercules_opt/src/fork_transforms.rs @@ -1,32 +1,18 @@ use std::collections::{HashMap, HashSet}; -use std::ops::Sub; - -use itertools::Itertools; use bimap::BiMap; +use itertools::Itertools; -use hercules_ir::LoopTree; - -use hercules_ir::{Index, TypeID}; - -use hercules_ir::Subgraph; - -use hercules_ir::DynamicConstantID; - -use hercules_ir::Node; - -use hercules_ir::{get_uses, Function}; - -use hercules_ir::{NodeID, ID}; +use hercules_ir::*; -use crate::{DenseNodeMap, FunctionEditor, Loop, SparseNodeMap}; +use crate::*; type ForkID = usize; /** Places each reduce node into its own fork */ pub fn default_reduce_partition( editor: &FunctionEditor, - fork: NodeID, + _fork: NodeID, join: NodeID, ) -> SparseNodeMap<ForkID> { let mut map = SparseNodeMap::new(); @@ -107,7 +93,7 @@ pub fn find_reduce_dependencies<'a>( }) .collect(); - ret_val + ret_val } pub fn copy_subgraph( @@ -119,7 +105,7 @@ pub fn copy_subgraph( Vec<(NodeID, NodeID)>, ) // returns all new nodes, a map from old nodes to new nodes, and // a vec of pairs of nodes (old node, outside node) s.t old node -> outside node, - // outside means not part of the original subgraph. + // outside means not part of the original subgraph. { let mut map: HashMap<NodeID, NodeID> = HashMap::new(); let mut new_nodes: HashSet<NodeID> = HashSet::new(); @@ -158,9 +144,9 @@ pub fn copy_subgraph( pub fn fork_fission<'a>( editor: &'a mut FunctionEditor, - control_subgraph: &Subgraph, - types: &Vec<TypeID>, - loop_tree: &LoopTree, + _control_subgraph: &Subgraph, + _types: &Vec<TypeID>, + _loop_tree: &LoopTree, fork_join_map: &HashMap<NodeID, NodeID>, ) -> () { let forks: Vec<_> = editor @@ -177,7 +163,7 @@ pub fn fork_fission<'a>( }) .collect(); - let mut control_pred = NodeID::new(0); + let control_pred = NodeID::new(0); // This does the reduction fission: for fork in forks.clone() { @@ -190,10 +176,7 @@ pub fn fork_fission<'a>( // inner control in general *should* work right now without modifications. } let reduce_partition = default_reduce_partition(editor, fork, join); - - let (new_fork, new_join) = - fork_reduce_fission_helper(editor, fork_join_map, reduce_partition, control_pred, fork); - // control_pred = new_join; + fork_reduce_fission_helper(editor, fork_join_map, reduce_partition, control_pred, fork); } } @@ -202,7 +185,7 @@ pub fn fork_bufferize_fission_helper<'a>( editor: &'a mut FunctionEditor, fork_join_map: &HashMap<NodeID, NodeID>, bufferized_edges: HashSet<(NodeID, NodeID)>, // Describes what intermediate data should be bufferized. - original_control_pred: NodeID, // What the new fork connects to. + _original_control_pred: NodeID, // What the new fork connects to. types: &Vec<TypeID>, fork: NodeID, ) -> (NodeID, NodeID) { @@ -225,7 +208,7 @@ pub fn fork_bufferize_fission_helper<'a>( editor.edit(|mut edit| { new_join_id = edit.add_node(Node::Join { control: fork }); - let factors = edit.get_node(fork).try_fork().unwrap().1.clone(); + let factors = edit.get_node(fork).try_fork().unwrap().1; new_fork_id = edit.add_node(Node::Fork { control: new_join_id, factors: factors.into(), @@ -248,14 +231,14 @@ pub fn fork_bufferize_fission_helper<'a>( let thread_stuff_it = factors.into_iter().enumerate(); // FIxme: try to use unzip here? Idk why it wasn't working. - let (tids) = thread_stuff_it.clone().map(|(dim, factor)| { - (edit.add_node(Node::ThreadID { + let tids = thread_stuff_it.clone().map(|(dim, _)| { + edit.add_node(Node::ThreadID { control: fork, dimension: dim, - })) + }) }); - let array_dims = thread_stuff_it.clone().map(|(dim, factor)| (factor)); + let array_dims = thread_stuff_it.clone().map(|(_, factor)| (factor)); // Assume 1-d fork only for now. // let tid = edit.add_node(Node::ThreadID { control: fork, dimension: 0 }); @@ -282,11 +265,11 @@ pub fn fork_bufferize_fission_helper<'a>( edit = edit.replace_all_uses_where(NodeID::new(0), reduce, |usee| *usee == write)?; // Create read from buffer - let (tids) = thread_stuff_it.clone().map(|(dim, factor)| { - (edit.add_node(Node::ThreadID { + let tids = thread_stuff_it.clone().map(|(dim, _)| { + edit.add_node(Node::ThreadID { control: new_fork_id, dimension: dim, - })) + }) }); let position_idx = Index::Position(tids.collect::<Vec<_>>().into_boxed_slice()); @@ -341,19 +324,14 @@ pub fn fork_reduce_fission_helper<'a>( subgraph.insert(fork); subgraph.insert(reduce); - // println!("subgraph for {:?}: \n{:?}", reduce, subgraph); - - let (new_nodes, mapping, _) = copy_subgraph(editor, subgraph); - - // println!("new_nodes: {:?} ", new_nodes); - // println!("mapping: {:?} ",mapping); + let (_, mapping, _) = copy_subgraph(editor, subgraph); new_fork = mapping[&fork]; new_join = mapping[&join]; editor.edit(|mut edit| { // Atttach new_fork after control_pred - let (old_control_pred, factors) = edit.get_node(new_fork).try_fork().unwrap().clone(); + let (old_control_pred, _) = edit.get_node(new_fork).try_fork().unwrap().clone(); edit = edit.replace_all_uses_where(old_control_pred, new_control_pred, |usee| { *usee == new_fork })?; @@ -395,7 +373,7 @@ pub fn fork_coalesce( }); let fork_joins: Vec<_> = fork_joins.collect(); - // FIXME: Add a postorder traversal to optimize this. + // FIXME: Add a postorder traversal to optimize this. // FIXME: This could give us two forks that aren't actually ancestors / related, but then the helper will just return false early. // something like: `fork_joins.postorder_iter().windows(2)` is ideal here. @@ -430,7 +408,7 @@ pub fn fork_coalesce_helper( .filter(|node| editor.func().nodes[node.idx()].is_reduce()) { // check that inner reduce is of the inner join - let (outer_control, outer_init, outer_reduct) = editor.func().nodes[outer_reduce.idx()] + let (_, _, outer_reduct) = editor.func().nodes[outer_reduce.idx()] .try_reduce() .unwrap(); @@ -440,7 +418,7 @@ pub fn fork_coalesce_helper( let Node::Reduce { control: inner_control, init: inner_init, - reduct: inner_reduct, + reduct: _, } = inner_reduce_node else { return false; @@ -486,7 +464,7 @@ pub fn fork_coalesce_helper( return false; } - // Checklist: + // Checklist: // Increment inner TIDs // Add outer fork's dimension to front of inner fork. // Fuse reductions @@ -524,10 +502,10 @@ pub fn fork_coalesce_helper( // Fuse Reductions for (outer_reduce, inner_reduce) in pairs { - let (outer_control, outer_init, outer_reduct) = editor.func().nodes[outer_reduce.idx()] + let (_, outer_init, _) = editor.func().nodes[outer_reduce.idx()] .try_reduce() .unwrap(); - let (inner_control, inner_init, inner_reduct) = editor.func().nodes[inner_reduce.idx()] + let (_, inner_init, _) = editor.func().nodes[inner_reduce.idx()] .try_reduce() .unwrap(); editor.edit(|mut edit| { diff --git a/hercules_opt/src/forkify.rs b/hercules_opt/src/forkify.rs index 49ba98a652acc51632c5b5d6d3705393dd642bd5..ce9ac1412f1253bff6589ec668db63725183ca6c 100644 --- a/hercules_opt/src/forkify.rs +++ b/hercules_opt/src/forkify.rs @@ -1,40 +1,20 @@ -use core::panic; use std::collections::HashMap; use std::collections::HashSet; use std::iter::zip; use std::iter::FromIterator; +use itertools::Itertools; use nestify::nest; -use bitvec::order::Lsb0; -use bitvec::vec::BitVec; - -use hercules_ir::Subgraph; - -use hercules_ir::control_subgraph; - -use crate::calculate_loop_nodes; -use crate::compute_induction_vars; -use crate::compute_iv_ranges; -use crate::compute_loop_variance; -use crate::get_loop_exit_conditions; -use crate::has_canonical_iv; -use crate::walk_all_users; -use crate::walk_all_users_stop_on; -use crate::walk_all_uses; -use crate::walk_all_uses_stop_on; -use crate::DenseNodeMap; -use crate::FunctionEditor; -use crate::InductionVariable; -use crate::Loop; -use crate::LoopExit; -use crate::LoopVarianceInfo; - -use hercules_ir::def_use::*; -use hercules_ir::ir::*; -use hercules_ir::loops::*; -use itertools::Itertools; +use hercules_ir::*; + +use crate::*; +/* + * TODO: Forkify currently makes a bunch of small edits - this needs to be + * changed so that every loop that gets forkified corresponds to a single edit + * + sub-edits. This would allow us to run forkify on a subset of a function. + */ pub fn forkify( editor: &mut FunctionEditor, control_subgraph: &Subgraph, @@ -101,23 +81,6 @@ pub fn get_node_as_dc( } } -fn all_same_variant<I, T>(mut iter: I) -> bool -where - I: Iterator<Item = T>, -{ - // Empty iterator case - return true - let first = match iter.next() { - None => return true, - Some(val) => val, - }; - - // Get discriminant of first item - let first_discriminant = std::mem::discriminant(&first); - - // Check all remaining items have same discriminant - iter.all(|x| std::mem::discriminant(&x) == first_discriminant) -} - /** Top level function to convert natural loops with simple induction variables into fork-joins. @@ -125,7 +88,7 @@ where pub fn forkify_loop( editor: &mut FunctionEditor, control_subgraph: &Subgraph, - fork_join_map: &HashMap<NodeID, NodeID>, + _fork_join_map: &HashMap<NodeID, NodeID>, l: &Loop, ) -> bool { let function = editor.func(); @@ -155,14 +118,14 @@ pub fn forkify_loop( // Get bound let bound = match canonical_iv { InductionVariable::Basic { - node, - initializer, - update, + node: _, + initializer: _, + update: _, final_value, } => final_value .map(|final_value| get_node_as_dc(editor, final_value)) .and_then(|r| r.ok()), - InductionVariable::SCEV(node_id) => return false, + InductionVariable::SCEV(_) => return false, }; let Some(bound_dc_id) = bound else { @@ -219,8 +182,6 @@ pub fn forkify_loop( .into_iter() .collect(); - let function = editor.func(); - // TODO: Handle multiple loop body lasts. // If there are multiple candidates for loop body last, return false. if editor @@ -241,35 +202,52 @@ pub fn forkify_loop( return false; } - let phi_latches: Vec<_> = reductionable_phis.iter().map(|phi| { - let LoopPHI::Reductionable { phi, data_cycle, continue_latch, is_associative } = phi else {unreachable!()}; - continue_latch - }).collect(); + let phi_latches: Vec<_> = reductionable_phis + .iter() + .map(|phi| { + let LoopPHI::Reductionable { + phi: _, + data_cycle: _, + continue_latch, + is_associative: _, + } = phi + else { + unreachable!() + }; + continue_latch + }) + .collect(); - let stop_on: HashSet<_> = editor.node_ids().filter(|node| { - if editor.node(node).is_phi() { - return true; - } - if editor.node(node).is_reduce() { - return true; - } - if editor.node(node).is_control() { - return true; - } - if phi_latches.contains(&node) { - return true; - } + let stop_on: HashSet<_> = editor + .node_ids() + .filter(|node| { + if editor.node(node).is_phi() { + return true; + } + if editor.node(node).is_reduce() { + return true; + } + if editor.node(node).is_control() { + return true; + } + if phi_latches.contains(&node) { + return true; + } + + false + }) + .collect(); - false - }).collect(); - - // Outside loop users of IV, then exit; - // Unless the outside user is through the loop latch of a reducing phi, + // Unless the outside user is through the loop latch of a reducing phi, // then we know how to replace this edge, so its fine! - let iv_users: Vec<_> = walk_all_users_stop_on(canonical_iv.phi(), editor, stop_on.clone()).collect(); - - if iv_users.iter().any(|node| !loop_nodes.contains(&node) && *node != loop_if) { + let iv_users: Vec<_> = + walk_all_users_stop_on(canonical_iv.phi(), editor, stop_on.clone()).collect(); + + if iv_users + .iter() + .any(|node| !loop_nodes.contains(&node) && *node != loop_if) + { return false; } @@ -345,9 +323,9 @@ pub fn forkify_loop( for reduction_phi in reductionable_phis { let LoopPHI::Reductionable { phi, - data_cycle, + data_cycle: _, continue_latch, - is_associative, + is_associative: _, } = reduction_phi else { panic!(); @@ -381,11 +359,11 @@ pub fn forkify_loop( } // Replace all uses of the loop header with the fork - editor.edit(|mut edit| edit.replace_all_uses(l.header, fork_id)); + editor.edit(|edit| edit.replace_all_uses(l.header, fork_id)); - editor.edit(|mut edit| edit.replace_all_uses(loop_continue_projection, fork_id)); + editor.edit(|edit| edit.replace_all_uses(loop_continue_projection, fork_id)); - editor.edit(|mut edit| edit.replace_all_uses(loop_exit_projection, join_id)); + editor.edit(|edit| edit.replace_all_uses(loop_exit_projection, join_id)); // Get rid of loop condition // DCE should get these, but delete them ourselves because we are nice :) @@ -418,9 +396,7 @@ nest! { impl LoopPHI { pub fn get_phi(&self) -> NodeID { match self { - LoopPHI::Reductionable { - phi, data_cycle, .. - } => *phi, + LoopPHI::Reductionable { phi, .. } => *phi, LoopPHI::LoopDependant(node_id) => *node_id, LoopPHI::UsedByDependant(node_id) => *node_id, } @@ -429,9 +405,9 @@ impl LoopPHI { /** Checks some conditions on loop variables that will need to be converted into reductions to be forkified. - - The phi is in a cycle *in the loop* with itself. + - The phi is in a cycle *in the loop* with itself. - Every cycle *in the loop* containing the phi does not contain any other phi of the loop header. - - The phi does not immediatley (not blocked by another phi or another reduce) use any other phis of the loop header. + - The phi does not immediatley (not blocked by another phi or another reduce) use any other phis of the loop header. */ pub fn analyze_phis<'a>( editor: &'a FunctionEditor, @@ -450,7 +426,7 @@ pub fn analyze_phis<'a>( let data = &editor.func().nodes[node.idx()]; // External Phi - if let Node::Phi { control, data } = data { + if let Node::Phi { control, data: _ } = data { if *control != natural_loop.header { return true; } @@ -464,8 +440,8 @@ pub fn analyze_phis<'a>( // External Reduce if let Node::Reduce { control, - init, - reduct, + init: _, + reduct: _, } = data { if !natural_loop.control[control.idx()] { @@ -483,7 +459,7 @@ pub fn analyze_phis<'a>( return false; }) .collect(); - + let continue_idx = editor .get_uses(natural_loop.header) .position(|node| natural_loop.control[node.idx()]) @@ -500,16 +476,11 @@ pub fn analyze_phis<'a>( let data = &editor.func().nodes[node.idx()]; // Phi, Reduce - if let Node::Phi { control, data } = data { + if data.is_phi() { return true; } - if let Node::Reduce { - control, - init, - reduct, - } = data - { + if data.is_reduce() { return true; } @@ -521,9 +492,9 @@ pub fn analyze_phis<'a>( return false; }) .collect(); - - let mut uses_for_dependance = walk_all_uses_stop_on(loop_continue_latch, editor, other_stop_on); + let mut uses_for_dependance = + walk_all_users_stop_on(loop_continue_latch, editor, other_stop_on); let set1: HashSet<_> = HashSet::from_iter(uses); let set2: HashSet<_> = HashSet::from_iter(users); @@ -534,16 +505,13 @@ pub fn analyze_phis<'a>( // we use `phis` because this phi can actually contain the loop iv and its fine. if uses_for_dependance.any(|node| phis.contains(&node) && node != *phi) { LoopPHI::LoopDependant(*phi) - } - else if intersection.clone().iter().any(|node| true) { - - + } else if intersection.clone().iter().next().is_some() { // PHIs on the frontier of the uses by the candidate phi, i.e in uses_for_dependance need // to have headers that postdominate the loop continue latch. The value of the PHI used needs to be defined // by the time the reduce is triggered (at the end of the loop's internal control). // No nodes in data cycles with this phi (in the loop) are used outside the loop, besides the loop_continue_latch. - // If some other node in the cycle is used, there is not a valid node to assign it after making the cycle a reduce. + // If some other node in the cycle is used, there is not a valid node to assign it after making the cycle a reduce. if intersection .iter() .filter(|node| **node != loop_continue_latch ) @@ -560,8 +528,8 @@ pub fn analyze_phis<'a>( return LoopPHI::LoopDependant(*phi); } - // FIXME: Do we want to calculate associativity here, there might be a case where this information is used in forkify - // i.e as described above. + // FIXME: Do we want to calculate associativity here, there might be a case where this information is used in forkify + // i.e as described above. let is_associative = false; // No nodes in the data cycle are used outside of the loop, besides the latched value of the phi diff --git a/hercules_opt/src/gcm.rs b/hercules_opt/src/gcm.rs index 0c7665bfe153dec310369575885074314eebad96..462d10871565bfed9b351d2627c44af8ca778ffc 100644 --- a/hercules_opt/src/gcm.rs +++ b/hercules_opt/src/gcm.rs @@ -1022,7 +1022,7 @@ fn liveness_dataflow( * device clones when a single node may potentially be on different devices. */ fn color_nodes( - editor: &mut FunctionEditor, + _editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, objects: &FunctionCollectionObjects, object_device_demands: &FunctionObjectDeviceDemands, @@ -1138,7 +1138,7 @@ fn object_allocation( typing: &Vec<TypeID>, node_colors: &FunctionNodeColors, alignments: &Vec<usize>, - liveness: &Liveness, + _liveness: &Liveness, backing_allocations: &BackingAllocations, ) -> FunctionBackingAllocation { let mut fba = BTreeMap::new(); @@ -1163,11 +1163,12 @@ fn object_allocation( Node::Call { control: _, function: callee, - dynamic_constants: _, + ref dynamic_constants, args: _, } => { + let dynamic_constants = dynamic_constants.clone(); for device in BACKED_DEVICES { - if let Some(callee_backing_size) = backing_allocations[&callee] + if let Some(mut callee_backing_size) = backing_allocations[&callee] .get(&device) .map(|(callee_total, _)| *callee_total) { @@ -1177,6 +1178,27 @@ fn object_allocation( // in the callee, so just assume the largest alignment. *total = align(&mut edit, *total, LARGEST_ALIGNMENT); offsets.insert(id, *total); + // Substitute the dynamic constant parameters in the + // callee's backing size. + let first_dc = edit.num_dynamic_constants() + 10000; + for (p_idx, dc_n) in zip(0..dynamic_constants.len(), first_dc..) { + let dc_a = + edit.add_dynamic_constant(DynamicConstant::Parameter(p_idx)); + callee_backing_size = substitute_dynamic_constants( + dc_a, + DynamicConstantID::new(dc_n), + callee_backing_size, + &mut edit, + ); + } + for (dc_n, dc_b) in zip(first_dc.., dynamic_constants.iter()) { + callee_backing_size = substitute_dynamic_constants( + DynamicConstantID::new(dc_n), + *dc_b, + callee_backing_size, + &mut edit, + ); + } *total = edit.add_dynamic_constant(DynamicConstant::Add( *total, callee_backing_size, diff --git a/hercules_opt/src/inline.rs b/hercules_opt/src/inline.rs index 064e3d73a1d9604ca5b284fe52b8c2e8c5a0339e..1d2bac97d848ace910d614a42743c6ea5fe3aa9e 100644 --- a/hercules_opt/src/inline.rs +++ b/hercules_opt/src/inline.rs @@ -179,7 +179,7 @@ fn inline_func( // as the new references we just made in the first step. We // actually want to institute all the updates // *simultaneously*, hence the two step maneuver. - let first_dc = edit.num_dynamic_constants() + 100; + let first_dc = edit.num_dynamic_constants() + 10000; for (dc_a, dc_n) in zip(dcs_a, first_dc..) { substitute_dynamic_constants_in_node( *dc_a, diff --git a/hercules_opt/src/interprocedural_sroa.rs b/hercules_opt/src/interprocedural_sroa.rs index 49fbcbbd712fad2977c64fd2ca1d9643bd95d74c..f597cd80347d94a7c927d6fe085d80f843e280eb 100644 --- a/hercules_opt/src/interprocedural_sroa.rs +++ b/hercules_opt/src/interprocedural_sroa.rs @@ -320,7 +320,7 @@ fn compress_return_products(editors: &mut Vec<FunctionEditor>, all_callsites_edi let mut substituted = old_return_type_ids[function_id.idx()]; assert_eq!(old_dcs.len(), new_dcs.len()); - let first_dc = edit.num_dynamic_constants() + 100; + let first_dc = edit.num_dynamic_constants() + 10000; for (dc_a, dc_n) in zip(old_dcs, first_dc..) { substituted = substitute_dynamic_constants_in_type( dc_a, @@ -424,7 +424,7 @@ fn remove_return_singletons(editors: &mut Vec<FunctionEditor>, all_callsites_edi if singleton_removed[function.idx()] { let edit_successful = editor.edit(|mut edit| { let mut substituted = old_return_type_ids[function.idx()]; - let first_dc = edit.num_dynamic_constants() + 100; + let first_dc = edit.num_dynamic_constants() + 10000; let dc_params: Vec<_> = (0..dc_args.len()) .map(|param_idx| { edit.add_dynamic_constant(DynamicConstant::Parameter(param_idx)) diff --git a/hercules_opt/src/ivar.rs b/hercules_opt/src/ivar.rs index 1f31e22088f170e2726241fc8796a3b496e81af3..f7252d29b66f9fc1882849206bbbf5b327a0f307 100644 --- a/hercules_opt/src/ivar.rs +++ b/hercules_opt/src/ivar.rs @@ -1,22 +1,9 @@ -use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; -use std::path::Iter; +use std::collections::HashSet; -use nestify::nest; - -use hercules_ir::Subgraph; - -use bitvec::order::Lsb0; use bitvec::prelude::*; -use bitvec::vec::BitVec; -use hercules_ir::get_uses; - -use hercules_ir::LoopTree; - -use crate::walk_all_uses_stop_on; - -use slotmap::{new_key_type, SlotMap}; +use nestify::nest; -use hercules_ir::ir::*; +use hercules_ir::*; use crate::*; @@ -67,9 +54,9 @@ impl InductionVariable { match self { InductionVariable::Basic { node, - initializer, - update, - final_value, + initializer: _, + update: _, + final_value: _, } => *node, InductionVariable::SCEV(_) => todo!(), } @@ -85,7 +72,7 @@ pub fn calculate_loop_nodes(editor: &FunctionEditor, natural_loop: &Loop) -> Has let data = &editor.func().nodes[node.idx()]; // External Phi - if let Node::Phi { control, data } = data { + if let Node::Phi { control, data: _ } = data { if !natural_loop.control[control.idx()] { return true; } @@ -93,8 +80,8 @@ pub fn calculate_loop_nodes(editor: &FunctionEditor, natural_loop: &Loop) -> Has // External Reduce if let Node::Reduce { control, - init, - reduct, + init: _, + reduct: _, } = data { if !natural_loop.control[control.idx()] { @@ -114,7 +101,7 @@ pub fn calculate_loop_nodes(editor: &FunctionEditor, natural_loop: &Loop) -> Has let phis: Vec<_> = editor .node_ids() .filter(|node| { - let Node::Phi { control, ref data } = editor.func().nodes[node.idx()] else { + let Node::Phi { control, data: _ } = editor.func().nodes[node.idx()] else { return false; }; natural_loop.control[control.idx()] @@ -139,7 +126,11 @@ pub fn calculate_loop_nodes(editor: &FunctionEditor, natural_loop: &Loop) -> Has }) .collect(); - all_users.intersection(&all_uses).chain(phis.iter()).cloned().collect() + all_users + .intersection(&all_uses) + .chain(phis.iter()) + .cloned() + .collect() } /** returns PHIs that are on any regions inside the loop. */ @@ -210,7 +201,7 @@ pub fn compute_loop_variance(editor: &FunctionEditor, l: &Loop) -> LoopVarianceI // Two conditions cause something to be loop variant: for node_use in get_uses(&function.nodes[node.idx()]).as_ref() { // 1) The use is a PHI *controlled* by the loop - if let Some((control, data)) = function.nodes[node_use.idx()].try_phi() { + if let Some((control, _)) = function.nodes[node_use.idx()].try_phi() { if *all_loop_nodes.get(control.idx()).unwrap() { node_variance = LoopVariance::Variant; break; @@ -321,7 +312,7 @@ pub fn get_loop_exit_conditions( pub fn has_const_fields(editor: &FunctionEditor, ivar: InductionVariable) -> bool { match ivar { InductionVariable::Basic { - node, + node: _, initializer, update, final_value, @@ -333,7 +324,7 @@ pub fn has_const_fields(editor: &FunctionEditor, ivar: InductionVariable) -> boo .iter() .any(|node| !editor.node(node).is_constant()) } - InductionVariable::SCEV(node_id) => false, + InductionVariable::SCEV(_) => false, } } @@ -341,12 +332,12 @@ pub fn has_const_fields(editor: &FunctionEditor, ivar: InductionVariable) -> boo // IVs need to be bounded... pub fn has_canonical_iv<'a>( editor: &FunctionEditor, - l: &Loop, + _l: &Loop, ivs: &'a [InductionVariable], ) -> Option<&'a InductionVariable> { ivs.iter().find(|iv| match iv { InductionVariable::Basic { - node, + node: _, initializer, update, final_value, @@ -367,7 +358,7 @@ pub fn has_canonical_iv<'a>( }) .is_some()) } - InductionVariable::SCEV(node_id) => false, + InductionVariable::SCEV(_) => false, }) } @@ -375,7 +366,7 @@ pub fn has_canonical_iv<'a>( pub fn compute_induction_vars( function: &Function, l: &Loop, - loop_variance: &LoopVarianceInfo, + _loop_variance: &LoopVarianceInfo, ) -> Vec<InductionVariable> { // 1) Gather PHIs contained in the loop. // FIXME: (@xrouth) Should this just be PHIs controlled by the header? @@ -474,12 +465,12 @@ pub fn compute_iv_ranges( induction_vars: Vec<InductionVariable>, loop_condition: &LoopExit, ) -> Vec<InductionVariable> { - let (if_node, condition_node) = match loop_condition { + let condition_node = match loop_condition { LoopExit::Conditional { - if_node, + if_node: _, condition_node, - } => (if_node, condition_node), - LoopExit::Unconditional(node_id) => todo!(), + } => condition_node, + LoopExit::Unconditional(_) => todo!(), }; // Find IVs used by the loop condition, not across loop iterations. @@ -487,7 +478,7 @@ pub fn compute_iv_ranges( let stop_on: HashSet<_> = editor .node_ids() .filter(|node_id| { - if let Node::Phi { control, data } = editor.node(node_id) { + if let Node::Phi { control, data: _ } = editor.node(node_id) { *control == l.header } else { false @@ -513,20 +504,23 @@ pub fn compute_iv_ranges( // FIXME: DO linear algerbra to solve for loop bounds with multiple variables involved. let final_value = match &editor.func().nodes[condition_node.idx()] { - Node::Phi { control, data } => None, + Node::Phi { + control: _, + data: _, + } => None, Node::Reduce { - control, - init, - reduct, + control: _, + init: _, + reduct: _, } => None, - Node::Parameter { index } => None, - Node::Constant { id } => None, - Node::Unary { input, op } => None, + Node::Parameter { index: _ } => None, + Node::Constant { id: _ } => None, + Node::Unary { input: _, op: _ } => None, Node::Ternary { - first, - second, - third, - op, + first: _, + second: _, + third: _, + op: _, } => None, Node::Binary { left, right, op } => { match op { @@ -543,7 +537,7 @@ pub fn compute_iv_ranges( else if let Node::Binary { left: inner_left, right: inner_right, - op: inner_op, + op: _, } = editor.node(left) { let pattern = [(inner_left, inner_right), (inner_right, inner_left)] @@ -556,12 +550,12 @@ pub fn compute_iv_ranges( // FIXME: pattern_constant can be anything >= loop_update expression, let update = match iv { InductionVariable::Basic { - node, - initializer, + node: _, + initializer: _, update, - final_value, + final_value: _, } => update, - InductionVariable::SCEV(node_id) => todo!(), + InductionVariable::SCEV(_) => todo!(), }; if *pattern_constant == update { Some(*right) @@ -600,7 +594,7 @@ pub fn compute_iv_ranges( update: *update, final_value, }, - InductionVariable::SCEV(node_id) => todo!(), + InductionVariable::SCEV(_) => todo!(), }; // Propagate bounds to other IVs. diff --git a/hercules_opt/src/lib.rs b/hercules_opt/src/lib.rs index 01ae1c99ad3613e826801afebdb0e15376ae1377..e3cca1612354fee6b5544264b97a37f1352bbafd 100644 --- a/hercules_opt/src/lib.rs +++ b/hercules_opt/src/lib.rs @@ -4,7 +4,6 @@ pub mod ccp; pub mod crc; pub mod dce; pub mod delete_uncalled; -pub mod device_placement; pub mod editor; pub mod float_collections; pub mod fork_concat_split; @@ -30,7 +29,6 @@ pub use crate::ccp::*; pub use crate::crc::*; pub use crate::dce::*; pub use crate::delete_uncalled::*; -pub use crate::device_placement::*; pub use crate::editor::*; pub use crate::float_collections::*; pub use crate::fork_concat_split::*; diff --git a/hercules_opt/src/outline.rs b/hercules_opt/src/outline.rs index e59c815da12b505cadc807c4d87e6a2ef913d3fa..8fe978c5c9554fa7d0fd42f480ff724dcdc9cb36 100644 --- a/hercules_opt/src/outline.rs +++ b/hercules_opt/src/outline.rs @@ -4,7 +4,6 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use hercules_ir::def_use::*; use hercules_ir::dom::*; -use hercules_ir::fork_join_analysis::*; use hercules_ir::ir::*; use hercules_ir::subgraph::*; diff --git a/hercules_opt/src/unforkify.rs b/hercules_opt/src/unforkify.rs index 0efd0b855969dba4b0c8e2d0dfdc9ab2220f6f50..85ffd233dad79ca3339525cdf4542493d8c20124 100644 --- a/hercules_opt/src/unforkify.rs +++ b/hercules_opt/src/unforkify.rs @@ -11,7 +11,6 @@ pub fn calculate_fork_nodes( editor: &FunctionEditor, inner_control: &NodeVec, fork: NodeID, - join: NodeID, ) -> HashSet<NodeID> { // Stop on PHIs / reduces outside of loop. let stop_on: HashSet<NodeID> = editor @@ -20,7 +19,7 @@ pub fn calculate_fork_nodes( let data = &editor.func().nodes[node.idx()]; // External Phi - if let Node::Phi { control, data } = data { + if let Node::Phi { control, data: _ } = data { if match inner_control.get(control.idx()) { Some(v) => !*v, // None => true, // Doesn't exist, must be external @@ -31,8 +30,8 @@ pub fn calculate_fork_nodes( // External Reduce if let Node::Reduce { control, - init, - reduct, + init: _, + reduct: _, } = data { if match inner_control.get(control.idx()) { @@ -127,7 +126,7 @@ pub fn unforkify( let fork = &l.0; let join = &fork_join_map[&fork]; - let fork_nodes = calculate_fork_nodes(editor, l.1, *fork, *join); + let fork_nodes = calculate_fork_nodes(editor, l.1, *fork); let nodes = &editor.func().nodes; let (fork_control, factors) = nodes[fork.idx()].try_fork().unwrap(); @@ -274,7 +273,7 @@ pub fn unforkify( zip(reduces.iter(), phi_ids).zip(phis).zip(join_phi_ids) { edit.sub_edit(*reduce, phi_id); - let Node::Phi { control, data } = phi else { + let Node::Phi { control: _, data } = phi else { panic!() }; edit = edit.replace_all_uses_where(*reduce, join_phi_id, |usee| { diff --git a/hercules_opt/src/utils.rs b/hercules_opt/src/utils.rs index cc7abc7fb18a2ce056b54b28ecd1db9f5db0589f..7ad48c1c09cc8542d1b521e3d8e12fe271ef1d39 100644 --- a/hercules_opt/src/utils.rs +++ b/hercules_opt/src/utils.rs @@ -384,13 +384,12 @@ pub type DenseNodeMap<T> = Vec<T>; pub type SparseNodeMap<T> = HashMap<NodeID, T>; nest! { -// Is this something editor should give... Or is it just for analyses. // #[derive(Clone, Debug)] pub struct NodeIterator<'a> { pub direction: #[derive(Clone, Debug, PartialEq)] - enum Direction { + pub enum Direction { Uses, Users, }, diff --git a/hercules_test/hercules_interpreter/src/interpreter.rs b/hercules_test/hercules_interpreter/src/interpreter.rs index 730f6216a225700bfa4b450f4cb8905904e404a4..a78330e4f08075be053593b41dba0f412687f5f1 100644 --- a/hercules_test/hercules_interpreter/src/interpreter.rs +++ b/hercules_test/hercules_interpreter/src/interpreter.rs @@ -253,8 +253,6 @@ impl<'a> FunctionExecutionState<'a> { } let thread_values = self.get_thread_factors(&token, join); - // println!("join for: {:?}", token); - // dbg!(thread_values.clone()); // This and_modify doesn't do aynthing?? self.join_counters .entry((thread_values.clone(), join)) @@ -365,8 +363,6 @@ impl<'a> FunctionExecutionState<'a> { } pub fn handle_data(&mut self, token: &ControlToken, node: NodeID) -> InterpreterVal { - // println!("Data Node: {} {:?}", node.idx(), &self.get_function().nodes[node.idx()]); - // Partial borrow complaint. :/ match &self.module.functions[self.function_id.idx()].nodes[node.idx()] { Node::Phi { @@ -386,14 +382,6 @@ impl<'a> FunctionExecutionState<'a> { .expect("PANIC: No nesting information for thread index!") .clone(); - let num_dims_this_level = (self.get_function().nodes - [nested_forks.first().unwrap().idx()] - .try_fork() - .unwrap() - .1 - .len()); - // println!("num forks this level:{:?} ", num_forks_this_level); - // Skip forks until we get to this level. // How many forks are outer? idfk. let outer_forks: Vec<NodeID> = nested_forks @@ -402,8 +390,6 @@ impl<'a> FunctionExecutionState<'a> { .take_while(|fork| *fork != node) .collect(); - // println!("otuer_forkes: {:?}", outer_forks); - let fork_levels: usize = outer_forks .iter() .skip(1) @@ -416,9 +402,7 @@ impl<'a> FunctionExecutionState<'a> { }) .sum(); - // println!("nested forks:{:?} ", nested_forks); - // println!("fork levels: {:?}", fork_levels); - // dimension might need to instead be dimensions - dimension + // Dimension might need to instead be dimensions - dimension let v = token.thread_indicies[fork_levels + dimension]; // Might have to -1? if VERBOSE { println!( @@ -432,12 +416,11 @@ impl<'a> FunctionExecutionState<'a> { // This probably isn't the exact condition, but somethign similar. Anyways, we achieve correctness by iterating control nodes recursively. Node::Reduce { control, - init, + init: _, reduct: _, } => { let thread_values = self.get_thread_factors(token, *control); - // println!("reduction read: {:?}, {:?}", thread_values, node); let entry = self.reduce_values.entry((thread_values.clone(), node)); let val = match entry { @@ -447,7 +430,6 @@ impl<'a> FunctionExecutionState<'a> { token, node, thread_values ), }; - // println!("value: {:?}", val.clone()); val } Node::Parameter { index } => self.args[*index].clone(), @@ -502,12 +484,11 @@ impl<'a> FunctionExecutionState<'a> { } } Node::Call { + control: _, function, dynamic_constants, args, - control, } => { - // todo!("call currently dissabled lol"); let args = args .into_iter() .map(|arg_node| self.handle_data(token, *arg_node)) @@ -536,7 +517,7 @@ impl<'a> FunctionExecutionState<'a> { } Node::Read { collect, indices } => { let collection = self.handle_data(token, *collect); - if let InterpreterVal::Undef(v) = collection { + if let InterpreterVal::Undef(_) = collection { collection } else { let result = self.handle_read(token, collection.clone(), indices); @@ -556,7 +537,7 @@ impl<'a> FunctionExecutionState<'a> { indices, } => { let collection = self.handle_data(token, *collect); - if let InterpreterVal::Undef(v) = collection { + if let InterpreterVal::Undef(_) = collection { collection } else { let data = self.handle_data(token, *data); @@ -610,7 +591,6 @@ impl<'a> FunctionExecutionState<'a> { }) .collect(); let idx = InterpreterVal::array_idx(&extents, &array_indices); - //println!("idx: {:?}", idx); if idx >= vals.len() { InterpreterVal::Undef(type_id) } else { @@ -702,12 +682,6 @@ impl<'a> FunctionExecutionState<'a> { .pop() .expect("PANIC: Interpreter ran out of control tokens without returning."); - // println!( - // "\n\nNew Token at: Control State: {} threads: {:?}, {:?}", - // ctrl_token.curr.idx(), - // ctrl_token.thread_indicies.clone(), - // &self.get_function().nodes[ctrl_token.curr.idx()] - // ); // TODO: (@xrouth): Enable this + PHI latch logging wi/ a simple debug flag. // Tracking PHI vals and control state is very useful for debugging. @@ -747,7 +721,7 @@ impl<'a> FunctionExecutionState<'a> { // Convert condition to usize let cond: usize = match cond { InterpreterVal::Boolean(v) => v.into(), - InterpreterVal::Undef(v) => panic!("PANIC: Undef reached IF"), + InterpreterVal::Undef(_) => panic!("PANIC: Undef reached IF"), _ => panic!("PANIC: Invalid condition for IF, please typecheck."), }; @@ -820,7 +794,7 @@ impl<'a> FunctionExecutionState<'a> { let mut temp = i; let mut new_token = ctrl_token.clone(); // Copy map, curr, prev, etc. - for (j, dim) in factors.clone().enumerate().rev() { + for (_, dim) in factors.clone().enumerate().rev() { new_token.thread_indicies.insert(num_outer_dims, temp % dim); // Stack of thread indicies temp /= dim; } @@ -854,7 +828,6 @@ impl<'a> FunctionExecutionState<'a> { self.initialize_reduction(&ctrl_token, reduction); } - // println!("tokens_to_add: {:?}", tokens_to_add); if VERBOSE { println!( "tf, fork, join, n_tokens: {:?}, {:?}, {:?}, {:?}", @@ -878,7 +851,6 @@ impl<'a> FunctionExecutionState<'a> { } Node::Return { control: _, data } => { let result = self.handle_data(&ctrl_token, *data); - // println!("result = {:?}", result); break 'outer result; } _ => { diff --git a/hercules_test/hercules_interpreter/src/lib.rs b/hercules_test/hercules_interpreter/src/lib.rs index 3f12618c3ecc13929a26c507342887d75d2234f5..66f8c4eac35baa0845464541c1eece8335c53430 100644 --- a/hercules_test/hercules_interpreter/src/lib.rs +++ b/hercules_test/hercules_interpreter/src/lib.rs @@ -8,7 +8,6 @@ use hercules_ir::Module; use hercules_ir::TypeID; use hercules_ir::ID; -use juno_scheduler::run_schedule_on_hercules; pub use juno_scheduler::PassManager; pub use crate::interpreter::*; @@ -37,10 +36,8 @@ pub fn into_interp_val( InterpreterWrapper::Array(array) => { let ty = &module.types[target_ty_id.idx()]; - let ele_type = ty - .try_element_type() + ty.try_element_type() .expect("PANIC: Invalid parameter type"); - // unwrap -> map to rust type, check let mut values = vec![]; @@ -53,13 +50,6 @@ pub fn into_interp_val( } } -pub fn array_from_interp_val<T: Clone>(module: &Module, interp_val: InterpreterVal) -> Vec<T> -where - value::InterpreterVal: Into<T>, -{ - vec![] -} - // Recursively turns rt args into interpreter wrappers. #[macro_export] macro_rules! parse_rt_args { diff --git a/hercules_test/hercules_interpreter/src/value.rs b/hercules_test/hercules_interpreter/src/value.rs index c84b48492837df0c356d9c76ec6fcc6f2f21f126..53911e05c2333a0e9b30c5bfdacb854f8409f692 100644 --- a/hercules_test/hercules_interpreter/src/value.rs +++ b/hercules_test/hercules_interpreter/src/value.rs @@ -821,7 +821,7 @@ impl<'a> InterpreterVal { (UnaryOperator::Neg, Self::Float64(val)) => Self::Float64(-val), (UnaryOperator::Cast(type_id), val) => { // FIXME: This probably doesn't work. - let val = val.as_i128(); + let val = val.as_i128(); match types[type_id.idx()] { Type::Control => todo!(), Type::Boolean => todo!(), diff --git a/hercules_test/hercules_tests/tests/fork_transform_tests.rs b/hercules_test/hercules_tests/tests/fork_transform_tests.rs index 432fdda029e0b1fec52cd20857430df9ddd5387d..3799ca0ac7e8abe9907603269692fbd438c4e33d 100644 --- a/hercules_test/hercules_tests/tests/fork_transform_tests.rs +++ b/hercules_test/hercules_tests/tests/fork_transform_tests.rs @@ -18,7 +18,7 @@ fn fission_simple1() { println!("result: {:?}", result_1); let sched = Some(default_schedule![ - Verify, //Xdot, + Verify, //Xdot, Unforkify, //Xdot, DCE, Verify, ]); diff --git a/hercules_test/hercules_tests/tests/forkify_tests.rs b/hercules_test/hercules_tests/tests/forkify_tests.rs index 5a8bff1a5d7c2021e27548e0464e52cc657da13b..8ba8e1354ec8f1c080e2d8b556fe427493af75ae 100644 --- a/hercules_test/hercules_tests/tests/forkify_tests.rs +++ b/hercules_test/hercules_tests/tests/forkify_tests.rs @@ -18,15 +18,13 @@ fn inner_fork_chain() { let params = 2; // TODO: (@xrouth) fix macro to take no params as an option. // let result_1 = interp_module!(module, 0, dyn_consts, 2); - // println!("result: {:?}", result_1); - let sched: Option<ScheduleStmt> = Some(default_schedule![Verify, Forkify, PhiElim, Verify,]); let module = run_schedule_on_hercules(module, sched).unwrap(); let result_2 = interp_module!(module, 0, dyn_consts, 2); println!("result: {:?}", result_2); - // assert_eq!(result_1, result_2) + //assert_eq!(result_1, result_2) } #[test] diff --git a/hercules_test/hercules_tests/tests/loop_tests.rs b/hercules_test/hercules_tests/tests/loop_tests.rs index 55da702d623f095af81fde761f0ed3014958312e..5832a161a18f18ea43860c1c5d6364385d0f187f 100644 --- a/hercules_test/hercules_tests/tests/loop_tests.rs +++ b/hercules_test/hercules_tests/tests/loop_tests.rs @@ -36,9 +36,7 @@ fn alternate_bounds_use_after_loop_no_tid() { println!("result: {:?}", result_1); let schedule = default_schedule![ - ////Xdot,, Forkify, - //Xdot, ]; let module = run_schedule_on_hercules(module, Some(schedule)).unwrap(); @@ -64,15 +62,12 @@ fn alternate_bounds_use_after_loop() { println!("result: {:?}", result_1); let schedule = Some(default_schedule![ - ////Xdot,, Forkify, - //Xdot, ]); let module = run_schedule_on_hercules(module, schedule).unwrap(); let result_2 = interp_module!(module, 0, dyn_consts, a.clone()); - //println!("{:?}", result_1); println!("{:?}", result_2); assert_eq!(result_1, result_2); @@ -91,14 +86,11 @@ fn alternate_bounds_use_after_loop2() { println!("result: {:?}", result_1); - let schedule = Some(default_schedule![ - ////Xdot,, - ]); + let schedule = Some(default_schedule![]); let module = run_schedule_on_hercules(module, schedule).unwrap(); let result_2 = interp_module!(module, 0, dyn_consts, a.clone()); - //println!("{:?}", result_1); println!("{:?}", result_2); assert_eq!(result_1, result_2); @@ -117,16 +109,13 @@ fn do_while_separate_body() { println!("result: {:?}", result_1); let schedule = Some(default_schedule![ - ////Xdot,, - PhiElim, ////Xdot,, + PhiElim, Forkify, - //Xdot, ]); let module = run_schedule_on_hercules(module, schedule).unwrap(); let result_2 = interp_module!(module, 0, dyn_consts, 2i32); - //println!("{:?}", result_1); println!("{:?}", result_2); assert_eq!(result_1, result_2); @@ -143,10 +132,8 @@ fn alternate_bounds_internal_control() { println!("result: {:?}", result_1); let schedule = Some(default_schedule![ - ////Xdot,, - PhiElim, ////Xdot,, + PhiElim, Forkify, - //Xdot, ]); let module = run_schedule_on_hercules(module, schedule).unwrap(); @@ -169,10 +156,8 @@ fn alternate_bounds_internal_control2() { println!("result: {:?}", result_1); let schedule = Some(default_schedule![ - ////Xdot,, - PhiElim, ////Xdot,, + PhiElim, Forkify, - //Xdot, ]); let module = run_schedule_on_hercules(module, schedule).unwrap(); @@ -331,7 +316,6 @@ fn implicit_clone_pipeline() { println!("result: {:?}", result_1); let schedule = default_schedule![ - ////Xdot,, Forkify, ForkGuardElim, Forkify, @@ -383,7 +367,6 @@ fn look_at_local() { ); let schedule = Some(default_schedule![ - ////Xdot,, ]); let result_1 = interp_module!(module, 0, dyn_consts, a.clone(), b.clone()); @@ -391,9 +374,7 @@ fn look_at_local() { let module = run_schedule_on_hercules(module.clone(), schedule).unwrap(); let schedule = Some(default_schedule![ - ////Xdot,, Unforkify, Verify, - ////Xdot,, ]); let module = run_schedule_on_hercules(module.clone(), schedule).unwrap(); @@ -452,19 +433,4 @@ fn matmul_pipeline() { println!("result: {:?}", result_2); assert_eq!(result_1, result_2); - - // Verify, - // GVN, - // DCE, - // AutoOutline, - // InterproceduralSROA, - // SROA, - // InferSchedules, - // DCE, - // GCM, - // DCE, - // PhiElim, - // FloatCollections, - // GCM, - // //Xdot, } diff --git a/juno_samples/matmul/src/main.rs b/juno_samples/matmul/src/main.rs index fa5d1f04d48cdf48cf377e8f3d08de80d30e688e..624ee5652a78d9c2ab7bc84d3974bf2df5b02838 100644 --- a/juno_samples/matmul/src/main.rs +++ b/juno_samples/matmul/src/main.rs @@ -24,10 +24,14 @@ fn main() { let a = HerculesCPURef::from_slice(&a); let b = HerculesCPURef::from_slice(&b); let mut r = runner!(matmul); - let c = r.run(I as u64, J as u64, K as u64, a.clone(), b.clone()).await; + let c = r + .run(I as u64, J as u64, K as u64, a.clone(), b.clone()) + .await; assert_eq!(c.as_slice::<i32>(), &*correct_c); let mut r = runner!(tiled_64_matmul); - let tiled_c = r.run(I as u64, J as u64, K as u64, a.clone(), b.clone()).await; + let tiled_c = r + .run(I as u64, J as u64, K as u64, a.clone(), b.clone()) + .await; assert_eq!(tiled_c.as_slice::<i32>(), &*correct_c); }); } @@ -36,4 +40,3 @@ fn main() { fn matmul_test() { main(); } - diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs index 76e81ee9f0147caed9db3714515bd0362bbb3ae4..9888f3d2f2bd052b818049f3c225c614db54dbe7 100644 --- a/juno_scheduler/src/pm.rs +++ b/juno_scheduler/src/pm.rs @@ -1306,10 +1306,6 @@ fn run_pass( } Pass::ForkSplit => { assert!(args.is_empty()); - // FIXME: I'm not sure if this is the correct way to build fixpoint into the PM, - // i.e cloning selection. Does something need to be done to propagate labels between iterations - // of this loop? - loop { let mut inner_changed = false; pm.make_fork_join_maps(); @@ -1354,8 +1350,10 @@ fn run_pass( let Some(mut func) = func else { continue; }; + // TODO: uses direct return from forkify for now instead of + // func.modified, see comment on top of `forkify` for why. Fix + // this eventually. changed |= forkify(&mut func, control_subgraph, fork_join_map, loop_nest); - // func.modified(); } pm.delete_gravestones(); pm.clear_analyses(); @@ -1761,12 +1759,14 @@ fn run_pass( pm.make_reverse_postorders(); if force_analyses { + pm.make_typing(); pm.make_doms(); pm.make_fork_join_maps(); pm.make_devices(); } let reverse_postorders = pm.reverse_postorders.take().unwrap(); + let typing = pm.typing.take(); let doms = pm.doms.take(); let fork_join_maps = pm.fork_join_maps.take(); let devices = pm.devices.take(); @@ -1775,6 +1775,7 @@ fn run_pass( xdot_module( module, &reverse_postorders, + typing.as_ref(), doms.as_ref(), fork_join_maps.as_ref(), devices.as_ref(),