Skip to content
Snippets Groups Projects

Optimization for miranda

Merged rarbore2 requested to merge miranda_opt into main
Files
2
+ 67
10
@@ -152,6 +152,7 @@ pub fn gcm(
let backing_allocation = object_allocation(
editor,
typing,
fork_join_nest,
&node_colors,
&alignments,
&liveness,
@@ -1053,19 +1054,22 @@ fn add_extra_collection_dims(
devices: &Vec<Device>,
bbs: &BasicBlocks,
) -> bool {
if devices[editor.func_id().idx()] == Device::AsyncRust
&& editor.func().name == "_1_laplacian_estimate"
{
if devices[editor.func_id().idx()] == Device::AsyncRust {
// Look for collection constant nodes inside fork-joins that are mutated
// inside the fork-join, aren't involved in any of the reduces of the
// fork-join, and have a user that isn't a direct read based on all of
// the thread IDs.
let nodes = &editor.func().nodes;
let fco = &objects[&editor.func_id()];
for id in editor.node_ids().filter(|id| {
nodes[id.idx()].is_constant() && !editor.get_type(typing[id.idx()]).is_primitive()
}) {
let candidates: Vec<_> = editor
.node_ids()
.filter(|id| {
editor.func().nodes[id.idx()].is_constant()
&& !editor.get_type(typing[id.idx()]).is_primitive()
})
.collect();
for id in candidates {
// Check all of the above conditions.
let nodes = &editor.func().nodes;
if editor.get_users(id).len() != 1 {
continue;
}
@@ -1115,7 +1119,43 @@ fn add_extra_collection_dims(
// We know that this collection needs to be replicated across the
// fork-join dimensions, so do that.
todo!()
let ty = typing[id.idx()];
let num_dims: Vec<_> = forks
.into_iter()
.rev()
.map(|id| nodes[id.idx()].try_fork().unwrap().1.len())
.collect();
let factors = forks
.into_iter()
.rev()
.flat_map(|id| nodes[id.idx()].try_fork().unwrap().1.into_iter())
.map(|dc| *dc)
.collect();
let array_ty = Type::Array(ty, factors);
let success = editor.edit(|mut edit| {
let new_ty = edit.add_type(array_ty);
let new_cons = edit.add_zero_constant(new_ty);
let new_cons = edit.add_node(Node::Constant { id: new_cons });
let mut tids = vec![];
for (fork, num_dims) in forks.into_iter().rev().zip(num_dims) {
for dim in 0..num_dims {
tids.push(edit.add_node(Node::ThreadID {
control: *fork,
dimension: dim,
}));
}
}
let read = edit.add_node(Node::Read {
collect: new_cons,
indices: Box::new([Index::Position(tids.into_boxed_slice())]),
});
edit.sub_edit(id, new_cons);
edit = edit.replace_all_uses(id, read)?;
edit = edit.delete_node(id)?;
Ok(edit)
});
assert!(success);
return true;
}
}
false
@@ -1601,6 +1641,7 @@ fn type_size(edit: &mut FunctionEdit, ty_id: TypeID, alignments: &Vec<usize>) ->
fn object_allocation(
editor: &mut FunctionEditor,
typing: &Vec<TypeID>,
fork_join_nest: &HashMap<NodeID, Vec<NodeID>>,
node_colors: &FunctionNodeColors,
alignments: &Vec<usize>,
_liveness: &Liveness,
@@ -1626,7 +1667,7 @@ fn object_allocation(
}
}
Node::Call {
control: _,
control,
function: callee,
ref dynamic_constants,
args: _,
@@ -1656,9 +1697,25 @@ fn object_allocation(
callee_backing_size,
&mut edit,
);
// Multiply the backing allocation size of the
// callee by the number of parallel threads that
// will call the function.
let forks = &fork_join_nest[&control];
let factors: Vec<_> = forks
.into_iter()
.rev()
.flat_map(|id| edit.get_node(*id).try_fork().unwrap().1.into_iter())
.map(|dc| *dc)
.collect();
let mut multiplied_callee_backing_size = callee_backing_size;
for factor in factors {
multiplied_callee_backing_size = edit.add_dynamic_constant(
DynamicConstant::mul(multiplied_callee_backing_size, factor),
);
}
*total = edit.add_dynamic_constant(DynamicConstant::add(
*total,
callee_backing_size,
multiplied_callee_backing_size,
));
}
}
Loading