Skip to content
Snippets Groups Projects
Commit f64bea72 authored by rarbore2's avatar rarbore2
Browse files

Anti-dependencies rewrite

parent 7b8363fe
No related branches found
No related tags found
1 merge request!90Anti-dependencies rewrite
......@@ -24,4 +24,5 @@ members = [
"juno_samples/matmul",
"juno_samples/casts_and_intrinsics",
"juno_samples/nested_ccp",
#"juno_samples/implicit_clone",
]
......@@ -27,6 +27,7 @@ pub fn cpu_codegen<W: Write>(
reverse_postorder: &Vec<NodeID>,
typing: &Vec<TypeID>,
control_subgraph: &Subgraph,
antideps: &Vec<(NodeID, NodeID)>,
bbs: &Vec<NodeID>,
w: &mut W,
) -> Result<(), Error> {
......@@ -38,6 +39,7 @@ pub fn cpu_codegen<W: Write>(
reverse_postorder,
typing,
control_subgraph,
antideps,
bbs,
};
ctx.codegen_function(w)
......@@ -51,6 +53,7 @@ struct CPUContext<'a> {
reverse_postorder: &'a Vec<NodeID>,
typing: &'a Vec<TypeID>,
control_subgraph: &'a Subgraph,
antideps: &'a Vec<(NodeID, NodeID)>,
bbs: &'a Vec<NodeID>,
}
......@@ -128,6 +131,7 @@ impl<'a> CPUContext<'a> {
.filter(|id| !self.function.nodes[id.idx()].is_control()),
);
let mut visited = bitvec![u8, Lsb0; 0; self.function.nodes.len()];
let antideps = flip_antideps(&self.antideps);
while let Some(id) = worklist.pop_front() {
let node = &self.function.nodes[id.idx()];
if node.is_phi()
......@@ -135,7 +139,12 @@ impl<'a> CPUContext<'a> {
|| get_uses(node)
.as_ref()
.into_iter()
.all(|u| self.function.nodes[u.idx()].is_control() || visited[u.idx()])
.chain(antideps.get(&id).into_iter().flatten())
.all(|u| {
self.function.nodes[u.idx()].is_control()
|| self.bbs[u.idx()] != self.bbs[id.idx()]
|| visited[u.idx()]
})
{
self.codegen_data_node(*id, &mut blocks)?;
visited.set(id.idx(), true);
......
......@@ -22,6 +22,7 @@ pub fn rt_codegen<W: Write>(
reverse_postorder: &Vec<NodeID>,
typing: &Vec<TypeID>,
control_subgraph: &Subgraph,
antideps: &Vec<(NodeID, NodeID)>,
bbs: &Vec<NodeID>,
collection_objects: &CollectionObjects,
callgraph: &CallGraph,
......@@ -34,6 +35,7 @@ pub fn rt_codegen<W: Write>(
reverse_postorder,
typing,
control_subgraph,
antideps,
bbs,
collection_objects,
callgraph,
......@@ -48,6 +50,7 @@ struct RTContext<'a> {
reverse_postorder: &'a Vec<NodeID>,
typing: &'a Vec<TypeID>,
control_subgraph: &'a Subgraph,
antideps: &'a Vec<(NodeID, NodeID)>,
bbs: &'a Vec<NodeID>,
collection_objects: &'a CollectionObjects,
callgraph: &'a CallGraph,
......@@ -218,6 +221,7 @@ impl<'a> RTContext<'a> {
.filter(|id| !func.nodes[id.idx()].is_control()),
);
let mut visited = bitvec![u8, Lsb0; 0; func.nodes.len()];
let antideps = flip_antideps(&self.antideps);
while let Some(id) = worklist.pop_front() {
let node = &func.nodes[id.idx()];
if node.is_phi()
......@@ -225,7 +229,12 @@ impl<'a> RTContext<'a> {
|| get_uses(node)
.as_ref()
.into_iter()
.all(|u| func.nodes[u.idx()].is_control() || visited[u.idx()])
.chain(antideps.get(&id).into_iter().flatten())
.all(|u| {
func.nodes[u.idx()].is_control()
|| self.bbs[u.idx()] != self.bbs[id.idx()]
|| visited[u.idx()]
})
{
self.codegen_data_node(*id, &mut blocks)?;
visited.set(id.idx(), true);
......
use std::collections::{BTreeMap, BTreeSet};
use std::iter::zip;
use crate::*;
/*
* In addition to collections, we need to figure out which "generation" of a
* collection a node may take as input.
*/
#[derive(PartialEq, Eq, Clone, Debug)]
struct GenerationLattice {
objs: BTreeSet<(CollectionObjectID, NodeID)>,
}
impl Semilattice for GenerationLattice {
fn meet(a: &Self, b: &Self) -> Self {
GenerationLattice {
objs: a.objs.union(&b.objs).map(|x| *x).collect(),
}
}
fn top() -> Self {
GenerationLattice {
objs: BTreeSet::new(),
}
}
fn bottom() -> Self {
// Bottom is not representable for this lattice with our Semilattice
// interface, but we never need to construct it.
panic!()
}
}
/*
* Function to assemble anti-dependence edges. Returns a list of pairs of nodes.
* The first item in the pair is the read node, and the second item is the write
* node.
* The first item in the pair is the reading node, and the second item is the
* mutating node.
*/
pub fn antideps(function: &Function, def_use: &ImmutableDefUseMap) -> Vec<(NodeID, NodeID)> {
// Anti-dependence edges are between a write node and a read node, where
// each node uses the same array value. The read must be scheduled before
// the write to avoid incorrect compilation.
let mut antideps = vec![];
pub fn antideps(
function: &Function,
reverse_postorder: &Vec<NodeID>,
objects: &FunctionCollectionObjects,
) -> Vec<(NodeID, NodeID)> {
// First, we analyze "generations" of collections as they are mutated.
// Originating nodes and mutating nodes start a new generation of a
// collection. Generations are not ordered due to loops, but are rather just
// node IDs of the originating or mutating node (parameter, constant, call,
// undef, write). Other nodes operating on collections mean reads / writes
// can operate on potentially different generations of multiple collections
// (phi, reduce, select).
let lattice = forward_dataflow(function, reverse_postorder, |inputs, id| {
match function.nodes[id.idx()] {
Node::Phi {
control: _,
data: _,
}
| Node::Reduce {
control: _,
init: _,
reduct: _,
}
| Node::Ternary {
op: TernaryOperator::Select,
first: _,
second: _,
third: _,
} => inputs
.into_iter()
.fold(GenerationLattice::top(), |acc, input| {
GenerationLattice::meet(&acc, input)
}),
Node::Parameter { index: _ } | Node::Constant { id: _ } | Node::Undef { ty: _ } => {
let objs = objects.objects(id);
GenerationLattice {
objs: objs.into_iter().map(|obj| (*obj, id)).collect(),
}
}
Node::Call {
control: _,
function: _,
dynamic_constants: _,
ref args,
} => {
let mut objs = BTreeSet::new();
let call_objs = objects.objects(id);
// If this call node might originate an object, add that to the
// lattice output - its generation is this call node.
for obj in call_objs {
if objects.origin(*obj) == CollectionObjectOrigin::Call(id) {
assert!(objs.len() <= 1);
objs.insert((*obj, id));
}
}
for id in (0..function.nodes.len()).map(NodeID::new) {
// Collect the reads and writes to / from this collection.
let users = def_use.get_users(id);
let reads = users.iter().filter(|user| {
if let Node::Read {
// For every argument...
for (arg, arg_gens) in zip(args, inputs.into_iter().skip(1)) {
// Look at its objects...
for arg_obj in objects.objects(*arg) {
// For each object that might be returned...
if call_objs.contains(&arg_obj) {
let mutable = objects.mutators(*arg_obj).contains(&id);
for (obj, gen) in arg_gens.objs.iter() {
// Add that object to the output lattice.
if obj == arg_obj && mutable {
// Set the generation to this node if the
// object might be mutated.
objs.insert((*obj, id));
} else if obj == arg_obj {
// Otherwise, keep the old generation.
objs.insert((*obj, *gen));
}
}
}
}
}
GenerationLattice { objs }
}
Node::Read {
collect: _,
indices: _,
} => inputs[0].clone(),
Node::Write {
collect: _,
data: _,
indices: _,
} => {
// Writes update the generation to the write.
let objs = inputs[0].objs.iter().map(|(obj, _)| (*obj, id)).collect();
GenerationLattice { objs }
}
_ => GenerationLattice::top(),
}
});
// Second, we generate anti-dependence edges from the dataflow analysis.
// There are three cases where an anti-dependence edge is generated:
//
// 1. A read node and a write node share an object and generation pair on
// their `collect` input.
// 2. A read node and a call node share an object and generation pair, where
// the pair is on the read's `collect` input and the pair is on any input
// of the call node AND the call node is a mutator of the object.
// 3. A call node and a write node share an object and generation pair,
// where the pair is on any input of the call node and the pair is on the
// write's `collect` input.
let mut reads_writes_calls_mut_calls_per_pair: BTreeMap<
(CollectionObjectID, NodeID),
(Vec<NodeID>, Vec<NodeID>, Vec<NodeID>, Vec<NodeID>),
> = BTreeMap::new();
for (idx, node) in function.nodes.iter().enumerate() {
let id = NodeID::new(idx);
match node {
Node::Read {
collect,
indices: _,
} = function.nodes[user.idx()]
{
collect == id
} else {
false
}
});
let mut writes = users.iter().filter(|user| {
if let Node::Write {
} => {
for pair in lattice[collect.idx()].objs.iter() {
reads_writes_calls_mut_calls_per_pair
.entry(*pair)
.or_default()
.0
.push(id);
}
}
Node::Write {
collect,
data: _,
indices: _,
} = function.nodes[user.idx()]
{
collect == id
} else {
false
} => {
for pair in lattice[collect.idx()].objs.iter() {
reads_writes_calls_mut_calls_per_pair
.entry(*pair)
.or_default()
.1
.push(id);
}
}
Node::Call {
control: _,
function: _,
dynamic_constants: _,
ref args,
} => {
for arg in args {
for pair in lattice[arg.idx()].objs.iter() {
if objects.mutators(pair.0).contains(&id) {
reads_writes_calls_mut_calls_per_pair
.entry(*pair)
.or_default()
.3
.push(id);
} else {
reads_writes_calls_mut_calls_per_pair
.entry(*pair)
.or_default()
.2
.push(id);
}
}
}
}
});
_ => {}
}
}
// If there are any writes, compute the anti dependence edges.
if let Some(write) = writes.next() {
for read in reads {
// Once we've matched reads / writes / calls by object and generation pair,
// the pair itself no longer matters.
let mut antideps = vec![];
for (_, (reads, writes, calls, mut_calls)) in reads_writes_calls_mut_calls_per_pair {
// Case 1:
for read in reads.iter() {
for write in writes.iter() {
antideps.push((*read, *write));
}
}
// Case 2:
for read in reads.iter() {
for mut_call in mut_calls.iter() {
antideps.push((*read, *mut_call));
}
}
// Case 3:
for call in calls.iter().chain(mut_calls.iter()) {
for write in writes.iter() {
antideps.push((*call, *write));
}
}
}
antideps
}
/*
* Utility to make a map from node to anti-dependency uses (map mutator ->
* reads).
*/
pub fn flip_antideps(antideps: &Vec<(NodeID, NodeID)>) -> BTreeMap<NodeID, Vec<NodeID>> {
let mut result: BTreeMap<NodeID, Vec<NodeID>> = BTreeMap::new();
for (read, mutator) in antideps {
result.entry(*mutator).or_default().push(*read);
}
result
}
extern crate bitvec;
use std::collections::{BTreeMap, BTreeSet};
use self::bitvec::prelude::*;
use crate::*;
/*
......@@ -33,7 +29,7 @@ use crate::*;
* - For each node in each function, which collection objects may be on the
* output of the node?
* - For each function, which collection objects may be mutated inside that
* function?
* function, and by what nodes?
* - For each function, which collection objects may be returned?
* - For each collection object, how was it originated?
*/
......@@ -50,7 +46,7 @@ define_id_type!(CollectionObjectID);
#[derive(Debug, Clone)]
pub struct FunctionCollectionObjects {
objects_per_node: Vec<Vec<CollectionObjectID>>,
mutated: BitVec<u8, Lsb0>,
mutated: Vec<Vec<NodeID>>,
returned: Vec<CollectionObjectID>,
origins: Vec<CollectionObjectOrigin>,
}
......@@ -58,7 +54,7 @@ pub struct FunctionCollectionObjects {
pub type CollectionObjects = BTreeMap<FunctionID, FunctionCollectionObjects>;
impl CollectionObjectOrigin {
fn try_parameter(&self) -> Option<usize> {
pub fn try_parameter(&self) -> Option<usize> {
match self {
CollectionObjectOrigin::Parameter(index) => Some(*index),
_ => None,
......@@ -87,7 +83,11 @@ impl FunctionCollectionObjects {
}
pub fn is_mutated(&self, object: CollectionObjectID) -> bool {
self.mutated[object.idx()]
!self.mutators(object).is_empty()
}
pub fn mutators(&self, object: CollectionObjectID) -> &Vec<NodeID> {
&self.mutated[object.idx()]
}
pub fn num_objects(&self) -> usize {
......@@ -309,13 +309,13 @@ pub fn collection_objects(
let returned = returned.into_iter().collect();
// Determine which objects are potentially mutated.
let mut mutated = bitvec![u8, Lsb0; 0; origins.len()];
let mut mutated = vec![vec![]; origins.len()];
for (idx, node) in func.nodes.iter().enumerate() {
if node.is_write() {
// Every object that the write itself corresponds to is mutable
// in this function.
for object in objects_per_node[idx].iter() {
mutated.set(object.idx(), true);
mutated[object.idx()].push(NodeID::new(idx));
}
} else if let Some((_, callee, _, args)) = node.try_call() {
let fco = &collection_objects[&callee];
......@@ -328,7 +328,7 @@ pub fn collection_objects(
// Then every object corresponding to the argument node
// in this function is mutable.
for object in objects_per_node[arg.idx()].iter() {
mutated.set(object.idx(), true);
mutated[object.idx()].push(NodeID::new(idx));
}
}
}
......
......@@ -10,6 +10,7 @@ pub mod forkify;
pub mod gvn;
pub mod inline;
pub mod interprocedural_sroa;
pub mod materialize_clones;
pub mod outline;
pub mod pass;
pub mod phi_elim;
......@@ -29,6 +30,7 @@ pub use crate::forkify::*;
pub use crate::gvn::*;
pub use crate::inline::*;
pub use crate::interprocedural_sroa::*;
pub use crate::materialize_clones::*;
pub use crate::outline::*;
pub use crate::pass::*;
pub use crate::phi_elim::*;
......
extern crate hercules_ir;
use self::hercules_ir::*;
use crate::*;
/*
* Top level function to materialize clones of collections. This transformation
* eliminates the possibility of multiple independent writes (including dynamic
* writes) to a single collection by introducing extra collection constants and
* inserting explicit clones. This allows us to make the simplifying assumption
* in the backend that collections have reference, rather than value, semantics.
* The pass calling this function is mandatory for correctness.
*/
pub fn materialize_clones(editor: &mut FunctionEditor, objects: &FunctionCollectionObjects) {
todo!()
}
......@@ -38,6 +38,7 @@ pub enum Pass {
DeleteUncalled,
ForkSplit,
Unforkify,
MaterializeClones,
InferSchedules,
Verify,
// Parameterized over whether analyses that aid visualization are necessary.
......@@ -239,13 +240,21 @@ impl PassManager {
pub fn make_antideps(&mut self) {
if self.antideps.is_none() {
self.make_def_uses();
self.make_reverse_postorders();
self.make_collection_objects();
self.antideps = Some(
zip(
self.def_uses.as_ref().unwrap().iter(),
self.module.functions.iter(),
zip(
self.reverse_postorders.as_ref().unwrap().iter(),
self.collection_objects.as_ref().unwrap().iter(),
),
)
.map(|(def_use, function)| antideps(function, def_use))
// Fine since collection_objects is a BTreeMap - iteration order
// is fixed.
.map(|(function, (reverse_postorder, objects))| {
antideps(function, reverse_postorder, objects.1)
})
.collect(),
);
}
......@@ -790,6 +799,34 @@ impl PassManager {
}
self.clear_analyses();
}
Pass::MaterializeClones => {
self.make_def_uses();
self.make_collection_objects();
let def_uses = self.def_uses.as_ref().unwrap();
let collection_objects = self.collection_objects.as_ref().unwrap();
for idx in 0..self.module.functions.len() {
let constants_ref =
RefCell::new(std::mem::take(&mut self.module.constants));
let dynamic_constants_ref =
RefCell::new(std::mem::take(&mut self.module.dynamic_constants));
let types_ref = RefCell::new(std::mem::take(&mut self.module.types));
let mut editor = FunctionEditor::new(
&mut self.module.functions[idx],
&constants_ref,
&dynamic_constants_ref,
&types_ref,
&def_uses[idx],
);
materialize_clones(&mut editor, &collection_objects[&FunctionID::new(idx)]);
self.module.constants = constants_ref.take();
self.module.dynamic_constants = dynamic_constants_ref.take();
self.module.types = types_ref.take();
self.module.functions[idx].delete_gravestones();
}
self.clear_analyses();
}
Pass::InferSchedules => {
self.make_def_uses();
self.make_fork_join_maps();
......@@ -862,12 +899,14 @@ impl PassManager {
self.make_reverse_postorders();
self.make_typing();
self.make_control_subgraphs();
self.make_antideps();
self.make_bbs();
self.make_collection_objects();
self.make_callgraph();
let reverse_postorders = self.reverse_postorders.as_ref().unwrap();
let typing = self.typing.as_ref().unwrap();
let control_subgraphs = self.control_subgraphs.as_ref().unwrap();
let antideps = self.antideps.as_ref().unwrap();
let bbs = self.bbs.as_ref().unwrap();
let collection_objects = self.collection_objects.as_ref().unwrap();
let callgraph = self.callgraph.as_ref().unwrap();
......@@ -886,6 +925,7 @@ impl PassManager {
&reverse_postorders[idx],
&typing[idx],
&control_subgraphs[idx],
&antideps[idx],
&bbs[idx],
&mut llvm_ir,
)
......@@ -896,6 +936,7 @@ impl PassManager {
&reverse_postorders[idx],
&typing[idx],
&control_subgraphs[idx],
&antideps[idx],
&bbs[idx],
&collection_objects,
&callgraph,
......@@ -964,8 +1005,11 @@ impl PassManager {
self.fork_join_maps = None;
self.fork_join_nests = None;
self.loops = None;
self.reduce_cycles = None;
self.antideps = None;
self.data_nodes_in_fork_joins = None;
self.bbs = None;
self.collection_objects = None;
self.callgraph = None;
}
......
[package]
name = "juno_implicit_clone"
version = "0.1.0"
authors = ["Russel Arbore <rarbore2@illinois.edu>"]
edition = "2021"
[[bin]]
name = "juno_implicit_clone"
path = "src/main.rs"
[build-dependencies]
juno_build = { path = "../../juno_build" }
[dependencies]
juno_build = { path = "../../juno_build" }
with_builtin_macros = "0.1.0"
async-std = "*"
extern crate juno_build;
use juno_build::JunoCompiler;
fn main() {
JunoCompiler::new()
.file_in_src("implicit_clone.jn")
.unwrap()
.build()
.unwrap();
}
#[entry]
fn antideps(a : usize, b : usize) -> i32 {
let arr : i32[3];
let r = arr[b];
arr[a] = 5;
return r + arr[b];
}
#[entry]
fn implicit_clone(input : i32) -> i32 {
let arr : i32[3];
arr[0] = 2;
let arr2 = arr;
arr2[1] = input;
arr[2] = 4;
return arr[0] + arr2[0] + arr[1] + arr2[1] + arr[2] + arr2[2];
}
#![feature(future_join, box_as_ptr)]
extern crate async_std;
extern crate juno_build;
juno_build::juno!("implicit_clone");
fn main() {
async_std::task::block_on(async {
let output = antideps(1, 1).await;
println!("{}", output);
assert_eq!(output, 5);
let output = implicit_clone(3).await;
println!("{}", output);
assert_eq!(output, 9);
});
}
#[test]
fn implicit_clone_test() {
main();
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment