Skip to content
Snippets Groups Projects

Optimization for miranda

Merged rarbore2 requested to merge miranda_opt into main
1 file
+ 21
2
Compare changes
  • Side-by-side
  • Inline
+ 156
5
@@ -82,6 +82,7 @@ pub fn gcm(
control_subgraph: &Subgraph,
dom: &DomTree,
fork_join_map: &HashMap<NodeID, NodeID>,
fork_join_nest: &HashMap<NodeID, Vec<NodeID>>,
loops: &LoopTree,
reduce_cycles: &HashMap<NodeID, HashSet<NodeID>>,
objects: &CollectionObjects,
@@ -120,6 +121,18 @@ pub fn gcm(
return None;
}
if add_extra_collection_dims(
editor,
typing,
fork_join_map,
fork_join_nest,
objects,
devices,
&bbs,
) {
return None;
}
let Some(node_colors) = color_nodes(editor, typing, &objects, &devices, node_colors) else {
return None;
};
@@ -139,6 +152,7 @@ pub fn gcm(
let backing_allocation = object_allocation(
editor,
typing,
fork_join_nest,
&node_colors,
&alignments,
&liveness,
@@ -1027,6 +1041,126 @@ fn spill_clones(
}
}
/*
* Look for mutated collections placed inside fork-joins in AsyncRust functions.
* These collections should be duplicated across the size of the fork-join.
*/
fn add_extra_collection_dims(
editor: &mut FunctionEditor,
typing: &Vec<TypeID>,
fork_join_map: &HashMap<NodeID, NodeID>,
fork_join_nest: &HashMap<NodeID, Vec<NodeID>>,
objects: &CollectionObjects,
devices: &Vec<Device>,
bbs: &BasicBlocks,
) -> bool {
if devices[editor.func_id().idx()] == Device::AsyncRust {
// Look for collection constant nodes inside fork-joins that are mutated
// inside the fork-join, aren't involved in any of the reduces of the
// fork-join, and have a user that isn't a direct read based on all of
// the thread IDs.
let fco = &objects[&editor.func_id()];
let candidates: Vec<_> = editor
.node_ids()
.filter(|id| {
editor.func().nodes[id.idx()].is_constant()
&& !editor.get_type(typing[id.idx()]).is_primitive()
})
.collect();
for id in candidates {
// Check all of the above conditions.
let nodes = &editor.func().nodes;
if editor.get_users(id).len() != 1 {
continue;
}
let forks = &fork_join_nest[&bbs.0[id.idx()]];
if forks.is_empty() {
continue;
}
let object = fco.objects(id)[0];
let mutated_inside = fco
.mutators(object)
.into_iter()
.any(|id| &fork_join_nest[&bbs.0[id.idx()]] == forks);
if !mutated_inside {
continue;
}
let in_reduce = forks.into_iter().any(|id| {
let join = fork_join_map[id];
let mut reduces = editor
.get_users(join)
.filter(|id| nodes[id.idx()].is_reduce());
reduces.any(|id| fco.objects(id).contains(&object))
});
if in_reduce {
continue;
}
if let Node::Read {
collect: _,
ref indices,
} = nodes[editor.get_users(id).next().unwrap().idx()]
&& let Index::Position(ref pos) = indices[0]
&& {
let tid_pos: BTreeSet<(NodeID, usize)> = pos
.into_iter()
.filter_map(|id| nodes[id.idx()].try_thread_id())
.collect();
let reference: BTreeSet<(NodeID, usize)> = forks
.into_iter()
.flat_map(|id| {
(0..nodes[id.idx()].try_fork().unwrap().1.len()).map(|dim| (*id, dim))
})
.collect();
tid_pos == reference
}
{
continue;
}
// We know that this collection needs to be replicated across the
// fork-join dimensions, so do that.
let ty = typing[id.idx()];
let num_dims: Vec<_> = forks
.into_iter()
.rev()
.map(|id| nodes[id.idx()].try_fork().unwrap().1.len())
.collect();
let factors = forks
.into_iter()
.rev()
.flat_map(|id| nodes[id.idx()].try_fork().unwrap().1.into_iter())
.map(|dc| *dc)
.collect();
let array_ty = Type::Array(ty, factors);
let success = editor.edit(|mut edit| {
let new_ty = edit.add_type(array_ty);
let new_cons = edit.add_zero_constant(new_ty);
let new_cons = edit.add_node(Node::Constant { id: new_cons });
let mut tids = vec![];
for (fork, num_dims) in forks.into_iter().rev().zip(num_dims) {
for dim in 0..num_dims {
tids.push(edit.add_node(Node::ThreadID {
control: *fork,
dimension: dim,
}));
}
}
let read = edit.add_node(Node::Read {
collect: new_cons,
indices: Box::new([Index::Position(tids.into_boxed_slice())]),
});
edit.sub_edit(id, new_cons);
edit = edit.replace_all_uses(id, read)?;
edit = edit.delete_node(id)?;
Ok(edit)
});
assert!(success);
return true;
}
}
false
}
type Liveness = BTreeMap<NodeID, Vec<BTreeSet<NodeID>>>;
/*
@@ -1507,12 +1641,13 @@ fn type_size(edit: &mut FunctionEdit, ty_id: TypeID, alignments: &Vec<usize>) ->
fn object_allocation(
editor: &mut FunctionEditor,
typing: &Vec<TypeID>,
fork_join_nest: &HashMap<NodeID, Vec<NodeID>>,
node_colors: &FunctionNodeColors,
alignments: &Vec<usize>,
_liveness: &Liveness,
backing_allocations: &BackingAllocations,
) -> FunctionBackingAllocation {
let mut fba = BTreeMap::new();
let mut fba = FunctionBackingAllocation::new();
let node_ids = editor.node_ids();
editor.edit(|mut edit| {
@@ -1526,13 +1661,13 @@ fn object_allocation(
let (total, offsets) =
fba.entry(device).or_insert_with(|| (zero, BTreeMap::new()));
*total = align(&mut edit, *total, alignments[typing[id.idx()].idx()]);
offsets.insert(id, *total);
let type_size = type_size(&mut edit, typing[id.idx()], alignments);
offsets.insert(id, (*total, type_size));
*total = edit.add_dynamic_constant(DynamicConstant::add(*total, type_size));
}
}
Node::Call {
control: _,
control,
function: callee,
ref dynamic_constants,
args: _,
@@ -1554,7 +1689,6 @@ fn object_allocation(
// We don't know the alignment requirement of the memory
// in the callee, so just assume the largest alignment.
*total = align(&mut edit, *total, LARGEST_ALIGNMENT);
offsets.insert(id, *total);
// Substitute the dynamic constant parameters in the
// callee's backing size.
callee_backing_size = substitute_dynamic_constants(
@@ -1562,9 +1696,26 @@ fn object_allocation(
callee_backing_size,
&mut edit,
);
offsets.insert(id, (*total, callee_backing_size));
// Multiply the backing allocation size of the
// callee by the number of parallel threads that
// will call the function.
let forks = &fork_join_nest[&control];
let factors: Vec<_> = forks
.into_iter()
.rev()
.flat_map(|id| edit.get_node(*id).try_fork().unwrap().1.into_iter())
.map(|dc| *dc)
.collect();
let mut multiplied_callee_backing_size = callee_backing_size;
for factor in factors {
multiplied_callee_backing_size = edit.add_dynamic_constant(
DynamicConstant::mul(multiplied_callee_backing_size, factor),
);
}
*total = edit.add_dynamic_constant(DynamicConstant::add(
*total,
callee_backing_size,
multiplied_callee_backing_size,
));
}
}
Loading