Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • llvm/hercules
1 result
Show changes
Commits on Source (2)
......@@ -115,15 +115,14 @@ pub fn forkify_loop(
return false;
};
// FIXME: Make sure IV is not used outside the loop.
// Get bound
let bound = match canonical_iv {
InductionVariable::Basic {
node: _,
initializer: _,
update: _,
final_value,
update_expression,
update_value,
} => final_value
.map(|final_value| get_node_as_dc(editor, final_value))
.and_then(|r| r.ok()),
......
......@@ -43,7 +43,8 @@ nest! {
pub Basic {
node: NodeID,
initializer: NodeID,
update: NodeID,
update_expression: NodeID,
update_value: NodeID,
final_value: Option<NodeID>,
},
SCEV(NodeID), // TODO @(xrouth)
......@@ -56,7 +57,8 @@ impl InductionVariable {
InductionVariable::Basic {
node,
initializer: _,
update: _,
update_expression: _,
update_value: _,
final_value: _,
} => *node,
InductionVariable::SCEV(_) => todo!(),
......@@ -75,12 +77,17 @@ pub fn calculate_loop_nodes(editor: &FunctionEditor, natural_loop: &Loop) -> Has
// External Phi
if let Node::Phi { control, data: _ } = data {
match natural_loop.control.get(control.idx()) {
Some(v) => if !*v {
return true;
},
Some(v) => {
if !*v {
return true;
}
}
None => {
panic!("unexpceted index: {:?} for loop {:?}", control, natural_loop.header);
},
panic!(
"unexpceted index: {:?} for loop {:?}",
control, natural_loop.header
);
}
}
}
// External Reduce
......@@ -91,24 +98,34 @@ pub fn calculate_loop_nodes(editor: &FunctionEditor, natural_loop: &Loop) -> Has
} = data
{
match natural_loop.control.get(control.idx()) {
Some(v) => if !*v {
return true;
},
Some(v) => {
if !*v {
return true;
}
}
None => {
panic!("unexpceted index: {:?} for loop {:?}", control, natural_loop.header);
},
panic!(
"unexpceted index: {:?} for loop {:?}",
control, natural_loop.header
);
}
}
}
// External Control
if data.is_control() {
match natural_loop.control.get(node.idx()) {
Some(v) => if !*v {
return true;
},
Some(v) => {
if !*v {
return true;
}
}
None => {
panic!("unexpceted index: {:?} for loop {:?}", node, natural_loop.header);
},
panic!(
"unexpceted index: {:?} for loop {:?}",
node, natural_loop.header
);
}
}
}
......@@ -332,13 +349,14 @@ pub fn has_const_fields(editor: &FunctionEditor, ivar: InductionVariable) -> boo
InductionVariable::Basic {
node: _,
initializer,
update,
final_value,
update_expression,
update_value,
} => {
if final_value.is_none() {
return false;
}
[initializer, update]
[initializer, update_value]
.iter()
.any(|node| !editor.node(node).is_constant())
}
......@@ -357,8 +375,9 @@ pub fn has_canonical_iv<'a>(
InductionVariable::Basic {
node: _,
initializer,
update,
final_value,
update_expression,
update_value,
} => {
(editor
.node(initializer)
......@@ -366,9 +385,11 @@ pub fn has_canonical_iv<'a>(
|| editor
.node(initializer)
.is_zero_dc(&editor.get_dynamic_constants()))
&& (editor.node(update).is_one_constant(&editor.get_constants())
&& (editor
.node(update_value)
.is_one_constant(&editor.get_constants())
|| editor
.node(update)
.node(update_value)
.is_one_dc(&editor.get_dynamic_constants()))
&& (final_value
.map(|val| {
......@@ -458,8 +479,9 @@ pub fn compute_induction_vars(
return Some(InductionVariable::Basic {
node: phi_id,
initializer: initializer_id,
update: b,
final_value: None,
update_expression: *data_id,
update_value: b,
});
} else {
None
......@@ -476,13 +498,12 @@ pub fn compute_induction_vars(
induction_variables
}
// Find loop iterations
pub fn compute_iv_ranges(
pub fn get_loop_condition_ivs(
editor: &FunctionEditor,
l: &Loop,
induction_vars: Vec<InductionVariable>,
induction_vars: &Vec<InductionVariable>,
loop_condition: &LoopExit,
) -> Vec<InductionVariable> {
) -> HashSet<NodeID> {
let condition_node = match loop_condition {
LoopExit::Conditional {
if_node: _,
......@@ -507,11 +528,39 @@ pub fn compute_iv_ranges(
// Bound IVs used in loop bound.
let loop_bound_uses: HashSet<_> =
walk_all_uses_stop_on(*condition_node, editor, stop_on).collect();
HashSet::from_iter(induction_vars.iter().filter_map(|iv| {
if loop_bound_uses.contains(&iv.phi()) {
Some(iv.phi())
} else {
None
}
}))
}
// Find loop iterations
pub fn compute_iv_ranges(
editor: &FunctionEditor,
l: &Loop,
induction_vars: Vec<InductionVariable>,
loop_condition: &LoopExit,
) -> Vec<InductionVariable> {
let condition_node = match loop_condition.clone() {
LoopExit::Conditional {
if_node: _,
condition_node,
} => condition_node,
LoopExit::Unconditional(_) => todo!(),
};
let loop_bound_iv_phis = get_loop_condition_ivs(editor, l, &induction_vars, loop_condition);
let (loop_bound_ivs, other_ivs): (Vec<InductionVariable>, Vec<InductionVariable>) =
induction_vars
.into_iter()
.partition(|f| loop_bound_uses.contains(&f.phi()));
.partition(|f| loop_bound_iv_phis.contains(&f.phi()));
// Assume there is only one loop bound iv.
let Some(iv) = loop_bound_ivs.first() else {
return other_ivs;
};
......@@ -520,6 +569,8 @@ pub fn compute_iv_ranges(
return loop_bound_ivs.into_iter().chain(other_ivs).collect();
}
// Bound IVs used in the loop condition.
// FIXME: DO linear algerbra to solve for loop bounds with multiple variables involved.
let final_value = match &editor.func().nodes[condition_node.idx()] {
Node::Phi {
......@@ -570,8 +621,9 @@ pub fn compute_iv_ranges(
InductionVariable::Basic {
node: _,
initializer: _,
update,
final_value: _,
update_expression: _,
update_value: update,
} => update,
InductionVariable::SCEV(_) => todo!(),
};
......@@ -604,12 +656,14 @@ pub fn compute_iv_ranges(
InductionVariable::Basic {
node,
initializer,
update,
final_value: _,
update_expression,
update_value,
} => InductionVariable::Basic {
node: *node,
initializer: *initializer,
update: *update,
update_expression: *update_expression,
update_value: *update_value,
final_value,
},
InductionVariable::SCEV(_) => todo!(),
......
......@@ -16,6 +16,7 @@ pub mod inline;
pub mod interprocedural_sroa;
pub mod ivar;
pub mod lift_dc_math;
pub mod loop_bound_canon;
pub mod outline;
pub mod phi_elim;
pub mod pred;
......@@ -43,6 +44,7 @@ pub use crate::inline::*;
pub use crate::interprocedural_sroa::*;
pub use crate::ivar::*;
pub use crate::lift_dc_math::*;
pub use crate::loop_bound_canon::*;
pub use crate::outline::*;
pub use crate::phi_elim::*;
pub use crate::pred::*;
......
use std::collections::HashMap;
use std::collections::HashSet;
use std::iter::zip;
use std::iter::FromIterator;
use itertools::Itertools;
use nestify::nest;
use hercules_ir::*;
use crate::*;
pub fn loop_bound_canon_toplevel(
editor: &mut FunctionEditor,
fork_join_map: &HashMap<NodeID, NodeID>,
control_subgraph: &Subgraph,
loops: &LoopTree,
) -> bool {
let natural_loops = loops
.bottom_up_loops()
.into_iter()
.filter(|(k, _)| editor.func().nodes[k.idx()].is_region());
let natural_loops: Vec<_> = natural_loops.collect();
for l in natural_loops {
if editor.is_mutable(l.0)
&& canonicalize_single_loop_bounds(
editor,
control_subgraph,
&Loop {
header: l.0,
control: l.1.clone(),
},
)
{
return true;
}
}
return false;
}
pub fn canonicalize_single_loop_bounds(
editor: &mut FunctionEditor,
control_subgraph: &Subgraph,
l: &Loop,
) -> bool {
let function = editor.func();
let Some(loop_condition) = get_loop_exit_conditions(function, l, control_subgraph) else {
return false;
};
let LoopExit::Conditional {
if_node: loop_if,
condition_node,
} = loop_condition.clone()
else {
return false;
};
let loop_variance = compute_loop_variance(editor, l);
let ivs = compute_induction_vars(editor.func(), l, &loop_variance);
let ivs = compute_iv_ranges(editor, l, ivs, &loop_condition);
if has_canonical_iv(editor, l, &ivs).is_some() {
// println!("has canon iv!");
return true;
}
let loop_bound_iv_phis = get_loop_condition_ivs(editor, l, &ivs, &loop_condition);
let (loop_bound_ivs, _): (Vec<InductionVariable>, Vec<InductionVariable>) = ivs
.into_iter()
.partition(|f| loop_bound_iv_phis.contains(&f.phi()));
// Assume there is only one loop bound iv.
if loop_bound_ivs.len() != 1 {
// println!("has multiple iv!");
return false;
}
let Some(iv) = loop_bound_ivs.first() else {
return false;
};
let InductionVariable::Basic {
node: iv_phi,
initializer,
final_value,
update_expression,
update_value,
} = iv
else {
return false;
};
let Some(final_value) = final_value else {
return false;
};
let Some(loop_pred) = editor
.get_uses(l.header)
.filter(|node| !l.control[node.idx()])
.next()
else {
return false;
};
// If there is a guard, we need to edit it.
// (init_id, bound_id, binop node, if node).
// FIXME: This is quite fragile.
let guard_info: Option<(NodeID, NodeID, NodeID, NodeID)> = (|| {
let Node::Projection {
control,
selection: _,
} = editor.node(loop_pred)
else {
return None;
};
let Node::If { control, cond } = editor.node(control) else {
return None;
};
let Node::Binary { left, right, op } = editor.node(cond) else {
return None;
};
let Node::Binary {
left: _,
right: _,
op: loop_op,
} = editor.node(condition_node)
else {
return None;
};
if op != loop_op {
return None;
}
if left != initializer {
return None;
}
if right != final_value {
return None;
}
return Some((*left, *right, *cond, *control));
})();
// // If guard is none, if some, make sure it is a good guard! move on
// if let Some((init_id, bound_id, binop_node, if_node))= potential_guard_info {
// };
// let fork_guard_condition =
// Lift dc math should make all constant into DCs, so these should all be DCs.
let Node::DynamicConstant { id: init_dc_id } = *editor.node(initializer) else {
return false;
};
let Node::DynamicConstant { id: update_dc_id } = *editor.node(update_value) else {
return false;
};
// We are assuming this is a simple loop bound (i.e only one induction variable involved), so that .
let Node::DynamicConstant {
id: loop_bound_dc_id,
} = *editor.node(final_value)
else {
return false;
};
// We need to do 4 (5) things, which are mostly separate.
// 0) Make the update into addition.
// 1) Make the update a positive value.
// 2) Transform the condition into a `<`
// 3) Adjust update to be 1 (and bounds).
// 4) Change init to start from 0.
// 5) Find some way to get fork-guard-elim to work with the new fork.
// ideally, this goes in fork-guard-elim, but for now we hack it to change the guard condition bounds
// here when we edit the loop bounds.
// Right now we are just going to do (4), because I am lazy!
// Collect info about the loop condition transformation.
let mut dc_bound_node = match *editor.node(condition_node) {
Node::Binary { left, right, op } => match op {
BinaryOperator::LT => {
if left == *update_expression && editor.node(right).is_dynamic_constant() {
right
} else {
return false;
}
}
BinaryOperator::LTE => todo!(),
BinaryOperator::GT => todo!(),
BinaryOperator::GTE => todo!(),
BinaryOperator::EQ => todo!(),
BinaryOperator::NE => todo!(),
BinaryOperator::Or => todo!(),
BinaryOperator::And => todo!(),
BinaryOperator::Xor => todo!(),
_ => panic!(),
},
_ => return false,
};
let Node::DynamicConstant {
id: bound_node_dc_id,
} = *editor.node(dc_bound_node)
else {
return false;
};
// If increment is negative (how in the world do we know that...)
// Increment can be DefinetlyPostiive, Unknown, DefinetlyNegative.
// // First, massage loop condition to be <, because that is normal!
// Also includes
// editor.edit(|mut edit| {
// }
// Collect immediate IV users
let update_expr_users: Vec<_> = editor
.get_users(*update_expression)
.filter(|node| *node != iv.phi() && *node != condition_node)
.collect();
// println!("update_expr_users: {:?}", update_expr_users);
let iv_phi_users: Vec<_> = editor
.get_users(iv.phi())
.filter(|node| *node != iv.phi() && *node != *update_expression)
.collect();
// println!(" iv_phi_users: {:?}", iv_phi_users);
let result = editor.edit(|mut edit| {
// 4) Second, change loop IV to go from 0..N.
// we subtract off init from init and dc_bound_node,
// and then we add it back to uses of the IV.
let new_init_dc = DynamicConstant::Constant(0);
let new_init = Node::DynamicConstant {
id: edit.add_dynamic_constant(new_init_dc),
};
let new_init = edit.add_node(new_init);
edit = edit.replace_all_uses_where(*initializer, new_init, |usee| *usee == iv.phi())?;
let new_condition_id = DynamicConstant::sub(bound_node_dc_id, init_dc_id);
let new_condition = Node::DynamicConstant {
id: edit.add_dynamic_constant(new_condition_id),
};
let new_condition = edit.add_node(new_condition);
edit = edit
.replace_all_uses_where(dc_bound_node, new_condition, |usee| *usee == condition_node)?;
// Change loop guard:
if let Some((init_id, bound_id, binop_node, if_node)) = guard_info {
edit = edit.replace_all_uses_where(init_id, new_init, |usee| *usee == binop_node)?;
edit =
edit.replace_all_uses_where(bound_id, new_condition, |usee| *usee == binop_node)?;
}
// Add back to uses of the IV
for user in update_expr_users {
let new_user = Node::Binary {
left: user,
right: *initializer,
op: BinaryOperator::Add,
};
let new_user = edit.add_node(new_user);
edit = edit.replace_all_uses(user, new_user)?;
}
let new_user = Node::Binary {
left: *update_expression,
right: *initializer,
op: BinaryOperator::Add,
};
let new_user = edit.add_node(new_user);
edit = edit.replace_all_uses_where(*update_expression, new_user, |usee| {
*usee != iv.phi()
&& *usee != *update_expression
&& *usee != new_user
&& *usee != condition_node
})?;
let new_user = Node::Binary {
left: *iv_phi,
right: *initializer,
op: BinaryOperator::Add,
};
let new_user = edit.add_node(new_user);
edit = edit.replace_all_uses_where(*iv_phi, new_user, |usee| {
*usee != iv.phi() && *usee != *update_expression && *usee != new_user
})?;
Ok(edit)
});
return result;
}
......@@ -132,10 +132,7 @@ pub fn infer_monoid_reduce(
let is_binop_monoid = |op| {
matches!(
op,
BinaryOperator::Add
| BinaryOperator::Mul
| BinaryOperator::Or
| BinaryOperator::And
BinaryOperator::Add | BinaryOperator::Mul | BinaryOperator::Or | BinaryOperator::And
)
};
let is_intrinsic_monoid = |intrinsic| matches!(intrinsic, Intrinsic::Max | Intrinsic::Min);
......@@ -147,9 +144,9 @@ pub fn infer_monoid_reduce(
init: _,
reduct,
} = func.nodes[id.idx()]
&& (matches!(func.nodes[reduct.idx()], Node::Binary { left, right, op }
if ((left == id && !reduce_cycles[&id].contains(&right)) ||
(right == id && !reduce_cycles[&id].contains(&left))) &&
&& (matches!(func.nodes[reduct.idx()], Node::Binary { left, right, op }
if ((left == id && !reduce_cycles[&id].contains(&right)) ||
(right == id && !reduce_cycles[&id].contains(&left))) &&
is_binop_monoid(op))
|| matches!(&func.nodes[reduct.idx()], Node::IntrinsicCall { intrinsic, args }
if (args.contains(&id) && is_intrinsic_monoid(*intrinsic) &&
......
......@@ -394,7 +394,7 @@ unsafe impl Sync for __RawPtrSendSync {}
*
* The data held at all of its non-None allocations and references is maintained so that it is the
* same, and so methods will attempt to use the reference or allocation that is most convenient.
*
*
* HerculesImmBox hold references to immutable memory only. All operations on these is through
* immutable references, though internally it uses OnceLocks to protect its resources since the Box
* may be used in multiple parallel threads if it is used in parallel Hercules code invocation.
......@@ -499,9 +499,9 @@ impl<'a, T> From<HerculesCUDARefMut<'a>> for HerculesImmBox<'a, T> {
}
}
impl<'a, T> HerculesImmBox<'a, T>
where
T: Default + Clone
impl<'a, T> HerculesImmBox<'a, T>
where
T: Default + Clone,
{
pub fn as_slice(&'a self) -> &'a [T] {
self.as_cpu_ref().as_slice()
......@@ -517,18 +517,23 @@ where
} else {
#[cfg(feature = "cuda")]
if let Some(cuda_ref) = self.cuda_ref.get() {
return
self.cpu_ref.get_or_init(|| {
return self
.cpu_ref
.get_or_init(|| {
let elements = unsafe { cuda_ref.__size() / size_of::<T>() };
let mut alloc = Vec::new();
alloc.resize_with(elements, Default::default);
let _ = cuda_ref.clone().to_cpu_ref(&mut alloc);
self.cpu_alloc.set(alloc).map_err(|_| ()).expect("HerculesImmBox cpu_alloc was set unexpectedly");
self.cpu_alloc
.set(alloc)
.map_err(|_| ())
.expect("HerculesImmBox cpu_alloc was set unexpectedly");
let alloc = self.cpu_alloc.get().unwrap();
HerculesCPURef::from_slice(alloc)
}).clone();
})
.clone();
}
panic!("HerculesImmBox has no reference to data")
......@@ -541,13 +546,19 @@ where
cuda_ref.clone()
} else {
if let Some(cpu_ref) = self.cpu_ref.get() {
return self.cuda_ref.get_or_init(|| {
// Copy data to CUDA device
let alloc = CUDABox::from_cpu_ref(cpu_ref.clone());
self.cuda_alloc.set(alloc).map_err(|_| ()).expect("HerculesImmBox cuda_alloc was set unexpectedly");
self.cuda_alloc.get().unwrap().get_ref()
}).clone();
return self
.cuda_ref
.get_or_init(|| {
// Copy data to CUDA device
let alloc = CUDABox::from_cpu_ref(cpu_ref.clone());
self.cuda_alloc
.set(alloc)
.map_err(|_| ())
.expect("HerculesImmBox cuda_alloc was set unexpectedly");
self.cuda_alloc.get().unwrap().get_ref()
})
.clone();
}
panic!("HerculesImmBox has no reference to data")
......@@ -651,7 +662,7 @@ impl<'a, T> From<HerculesCUDARefMut<'a>> for HerculesMutBox<'a, T> {
impl<'a, T> HerculesMutBox<'a, T>
where
T: Default + Clone
T: Default + Clone,
{
pub fn as_slice(&'a mut self) -> &'a mut [T] {
self.as_cpu_ref().as_slice()
......@@ -659,42 +670,41 @@ where
pub fn as_cpu_ref(&'a mut self) -> HerculesCPURefMut<'a> {
match self.loc {
HerculesMutBoxLocation::CPU => {
match self.cpu_alloc {
Allocation::None => panic!("No CPU reference"),
Allocation::Reference(ref mut val) => HerculesCPURefMut::from_slice(*val),
Allocation::Allocation(ref mut val) => HerculesCPURefMut::from_slice::<T>(val),
}
}
HerculesMutBoxLocation::CPU => match self.cpu_alloc {
Allocation::None => panic!("No CPU reference"),
Allocation::Reference(ref mut val) => HerculesCPURefMut::from_slice(*val),
Allocation::Allocation(ref mut val) => HerculesCPURefMut::from_slice::<T>(val),
},
#[cfg(feature = "cuda")]
HerculesMutBoxLocation::CUDA => {
let cuda_ref : HerculesCUDARef<'a> =
match self.cuda_alloc {
Allocation::None => panic!("No GPU reference"),
Allocation::Reference(ref mut val) => val.dup().as_ref(),
Allocation::Allocation(ref val) => val.get_ref(),
};
let cuda_ref: HerculesCUDARef<'a> = match self.cuda_alloc {
Allocation::None => panic!("No GPU reference"),
Allocation::Reference(ref mut val) => val.dup().as_ref(),
Allocation::Allocation(ref val) => val.get_ref(),
};
let elements = unsafe { cuda_ref.__size() / size_of::<T>() };
// Allocate host memory (if needed)
let cpu_alloc : Allocation<&'a mut [T], Vec<T>> =
match self.cpu_alloc.take() {
Allocation::Reference(val) if val.len() == elements => Allocation::Reference(val),
Allocation::Allocation(val) if val.len() == elements => Allocation::Allocation(val),
_ => {
let mut alloc = Vec::new();
alloc.resize_with(elements, Default::default);
Allocation::Allocation(alloc)
}
};
let cpu_alloc: Allocation<&'a mut [T], Vec<T>> = match self.cpu_alloc.take() {
Allocation::Reference(val) if val.len() == elements => {
Allocation::Reference(val)
}
Allocation::Allocation(val) if val.len() == elements => {
Allocation::Allocation(val)
}
_ => {
let mut alloc = Vec::new();
alloc.resize_with(elements, Default::default);
Allocation::Allocation(alloc)
}
};
self.cpu_alloc = cpu_alloc;
let cpu_ref : &'a mut [T] =
match &mut self.cpu_alloc {
Allocation::None => panic!(),
Allocation::Reference(val) => val,
Allocation::Allocation(val) => val,
};
let cpu_ref: &'a mut [T] = match &mut self.cpu_alloc {
Allocation::None => panic!(),
Allocation::Reference(val) => val,
Allocation::Allocation(val) => val,
};
// Transfer data from CUDA device
let cpu_ref = cuda_ref.to_cpu_ref(cpu_ref);
......@@ -709,31 +719,32 @@ where
pub fn as_cuda_ref(&'a mut self) -> HerculesCUDARefMut<'a> {
match self.loc {
HerculesMutBoxLocation::CPU => {
let cpu_ref : &'a [T] =
match self.cpu_alloc {
Allocation::None => panic!("No CPU reference"),
Allocation::Reference(ref val) => val,
Allocation::Allocation(ref val) => val,
};
let cpu_ref: &'a [T] = match self.cpu_alloc {
Allocation::None => panic!("No CPU reference"),
Allocation::Reference(ref val) => val,
Allocation::Allocation(ref val) => val,
};
let size = cpu_ref.len() * size_of::<T>();
let (cuda_alloc, copied) =
match self.cuda_alloc.take() {
Allocation::Reference(val) if unsafe { val.__size() == size } => (Allocation::Reference(val), false),
Allocation::Allocation(val) if val.get_bytes() == size => (Allocation::Allocation(val), false),
_ => {
let alloc = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(cpu_ref));
(Allocation::Allocation(alloc), true)
}
};
let (cuda_alloc, copied) = match self.cuda_alloc.take() {
Allocation::Reference(val) if unsafe { val.__size() == size } => {
(Allocation::Reference(val), false)
}
Allocation::Allocation(val) if val.get_bytes() == size => {
(Allocation::Allocation(val), false)
}
_ => {
let alloc = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(cpu_ref));
(Allocation::Allocation(alloc), true)
}
};
self.cuda_alloc = cuda_alloc;
let cuda_ref =
match self.cuda_alloc {
Allocation::None => panic!(),
Allocation::Reference(ref mut val) => val.dup(),
Allocation::Allocation(ref mut val) => val.get_ref_mut(),
};
let cuda_ref = match self.cuda_alloc {
Allocation::None => panic!(),
Allocation::Reference(ref mut val) => val.dup(),
Allocation::Allocation(ref mut val) => val.get_ref_mut(),
};
if !copied {
unsafe {
......@@ -744,13 +755,11 @@ where
self.loc = HerculesMutBoxLocation::CUDA;
cuda_ref
}
HerculesMutBoxLocation::CUDA => {
match self.cuda_alloc {
Allocation::None => panic!("No GPU reference"),
Allocation::Reference(ref mut val) => val.dup(),
Allocation::Allocation(ref mut val) => val.get_ref_mut(),
}
}
HerculesMutBoxLocation::CUDA => match self.cuda_alloc {
Allocation::None => panic!("No GPU reference"),
Allocation::Reference(ref mut val) => val.dup(),
Allocation::Allocation(ref mut val) => val.get_ref_mut(),
},
}
}
}
......@@ -760,7 +769,8 @@ pub trait HerculesImmBoxTo<'a, T> {
}
impl<'a, T> HerculesImmBoxTo<'a, HerculesCPURef<'a>> for HerculesImmBox<'a, T>
where T: Default + Clone
where
T: Default + Clone,
{
fn to(&'a self) -> HerculesCPURef<'a> {
self.as_cpu_ref()
......@@ -769,7 +779,8 @@ where T: Default + Clone
#[cfg(feature = "cuda")]
impl<'a, T> HerculesImmBoxTo<'a, HerculesCUDARef<'a>> for HerculesImmBox<'a, T>
where T: Default + Clone
where
T: Default + Clone,
{
fn to(&'a self) -> HerculesCUDARef<'a> {
self.as_cuda_ref()
......@@ -781,7 +792,8 @@ pub trait HerculesMutBoxTo<'a, T> {
}
impl<'a, T> HerculesMutBoxTo<'a, HerculesCPURefMut<'a>> for HerculesMutBox<'a, T>
where T: Default + Clone
where
T: Default + Clone,
{
fn to(&'a mut self) -> HerculesCPURefMut<'a> {
self.as_cpu_ref()
......@@ -790,7 +802,8 @@ where T: Default + Clone
#[cfg(feature = "cuda")]
impl<'a, T> HerculesMutBoxTo<'a, HerculesCUDARefMut<'a>> for HerculesMutBox<'a, T>
where T: Default + Clone
where
T: Default + Clone,
{
fn to(&'a mut self) -> HerculesCUDARefMut<'a> {
self.as_cuda_ref()
......
......@@ -409,10 +409,7 @@ fn matmul_pipeline() {
// };
// assert_eq!(correct_c[0], value);
let schedule = Some(default_schedule![
Xdot,
Verify,
]);
let schedule = Some(default_schedule![Xdot, Verify,]);
module = run_schedule_on_hercules(module, schedule).unwrap();
......
......@@ -45,12 +45,12 @@ fn main() {
let mut r = runner!(test6);
let output = r.run(73).await;
let correct = (73i32..73i32+1024i32).collect();
let correct = (73i32..73i32 + 1024i32).collect();
assert(&correct, output);
let mut r = runner!(test7);
let output = r.run(42).await;
let correct: i32 = (42i32..42i32+32i32).sum();
let correct: i32 = (42i32..42i32 + 32i32).sum();
assert_eq!(correct, output);
let mut r = runner!(test8);
......
......@@ -4,7 +4,11 @@ fn main() {
JunoCompiler::new()
.file_in_src("median.jn")
.unwrap()
.schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" })
.schedule_in_src(if cfg!(feature = "cuda") {
"gpu.sch"
} else {
"cpu.sch"
})
.unwrap()
.build()
.unwrap();
......
......@@ -5,18 +5,14 @@ juno_build::juno!("median");
use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo};
fn main() {
let m = vec![86, 72, 14, 5, 55,
25, 98, 89, 3, 66,
44, 81, 27, 3, 40,
18, 4, 57, 93, 34,
70, 50, 50, 18, 34];
let m = vec![
86, 72, 14, 5, 55, 25, 98, 89, 3, 66, 44, 81, 27, 3, 40, 18, 4, 57, 93, 34, 70, 50, 50, 18,
34,
];
let m = HerculesImmBox::from(m.as_slice());
let mut r = runner!(median_window);
let res =
async_std::task::block_on(async {
r.run(m.to()).await
});
let res = async_std::task::block_on(async { r.run(m.to()).await });
assert_eq!(res, 57);
}
......
......@@ -7,14 +7,18 @@ juno_build::juno!("products");
fn main() {
async_std::task::block_on(async {
let input = vec![(0, 1), (2, 3)];
let input : HerculesImmBox<(i32, i32)> = HerculesImmBox::from(input.as_slice());
let input: HerculesImmBox<(i32, i32)> = HerculesImmBox::from(input.as_slice());
let mut r = runner!(product_read);
let res : Vec<i32> = HerculesMutBox::from(r.run(input.to()).await).as_slice().to_vec();
let res: Vec<i32> = HerculesMutBox::from(r.run(input.to()).await)
.as_slice()
.to_vec();
assert_eq!(res, vec![0, 1, 2, 3]);
// Technically this returns a product of two i32s, but we can interpret that as an array
let mut r = runner!(product_return);
let res : Vec<i32> = HerculesMutBox::from(r.run(42, 17).await).as_slice().to_vec();
let res: Vec<i32> = HerculesMutBox::from(r.run(42, 17).await)
.as_slice()
.to_vec();
assert_eq!(res, vec![42, 17]);
});
}
......
......@@ -134,6 +134,7 @@ impl FromStr for Appliable {
"fork-unroll" | "unroll" => Ok(Appliable::Pass(ir::Pass::ForkUnroll)),
"fork-fusion" | "fusion" => Ok(Appliable::Pass(ir::Pass::ForkFusion)),
"lift-dc-math" => Ok(Appliable::Pass(ir::Pass::LiftDCMath)),
"loop-bound-canon" => Ok(Appliable::Pass(ir::Pass::LoopBoundCanon)),
"outline" => Ok(Appliable::Pass(ir::Pass::Outline)),
"phi-elim" => Ok(Appliable::Pass(ir::Pass::PhiElim)),
"predication" => Ok(Appliable::Pass(ir::Pass::Predication)),
......
......@@ -26,6 +26,7 @@ pub enum Pass {
Inline,
InterproceduralSROA,
LiftDCMath,
LoopBoundCanon,
Outline,
PhiElim,
Predication,
......
......@@ -2725,6 +2725,32 @@ fn run_pass(
Pass::Print => {
println!("{:?}", args.get(0));
}
Pass::LoopBoundCanon => {
assert_eq!(args.len(), 0);
pm.make_fork_join_maps();
pm.make_loops();
pm.make_control_subgraphs();
let fork_join_maps = pm.fork_join_maps.take().unwrap();
let loops = pm.loops.take().unwrap();
let control_subgraphs = pm.control_subgraphs.take().unwrap();
for (((func, fork_join_map), loops), control_subgraph) in
build_selection(pm, selection, false)
.into_iter()
.zip(fork_join_maps.iter())
.zip(loops.iter())
.zip(control_subgraphs.iter())
{
let Some(mut func) = func else {
continue;
};
loop_bound_canon_toplevel(&mut func, fork_join_map, control_subgraph, loops);
changed |= func.modified();
}
pm.delete_gravestones();
pm.clear_analyses();
}
}
println!("Ran Pass: {:?}", pass);
......