Skip to content
Snippets Groups Projects
Commit 556e3017 authored by Russel Arbore's avatar Russel Arbore
Browse files

Refactor CPU backend to support vectorization efforts

parent fe72fd34
No related tags found
No related merge requests found
Pipeline #201696 passed
......@@ -55,8 +55,12 @@ struct CPUContext<'a> {
#[derive(Default, Debug)]
struct LLVMBlock {
// Emit all the phis into the beginning.
phis: String,
// Emit the block body.
body: String,
// Emit the block terminator. Also put vector prefaces for phis in
// predecessors' `term`, since data flow is emitted before control flow.
term: String,
}
......@@ -196,17 +200,20 @@ impl<'a> CPUContext<'a> {
let succ1 = succs.next().unwrap();
let succ2 = succs.next().unwrap();
let succ1_is_true = self.function.nodes[succ1.idx()].try_projection(1).is_some();
let cond_name = self.get_value(cond, VectorVariableState::Scalar, term)?;
write!(
term,
" br {}, label %{}, label %{}\n",
self.get_value(cond, true),
" br i1 {}, label %{}, label %{}\n",
cond_name,
self.get_block_name(if succ1_is_true { succ1 } else { succ2 }),
self.get_block_name(if succ1_is_true { succ2 } else { succ1 }),
)?
}
Node::Return { control: _, data } => {
let term = &mut blocks.get_mut(&id).unwrap().term;
write!(term, " ret {}\n", self.get_value(data, true))?
let data_name = self.get_value(data, VectorVariableState::Scalar, term)?;
let data_ty = self.get_type(self.typing[data.idx()], self.try_vector(id));
write!(term, " ret {} {}\n", data_ty, data_name)?
}
_ => panic!(
"PANIC: Can't lower {:?} in {}.",
......@@ -218,7 +225,10 @@ impl<'a> CPUContext<'a> {
}
/*
* Lower data nodes in Hercules IR into LLVM instructions.
* Lower data nodes in Hercules IR into LLVM instructions. For each node, we
* usually calculate local temporary virtual registers of the inputs before
* using them. This is due to LLVM's textual format being very annoying
* about uses of vector registers and instructions.
*/
fn codegen_data_node(
&self,
......@@ -227,12 +237,19 @@ impl<'a> CPUContext<'a> {
) -> Result<(), Error> {
match self.function.nodes[id.idx()] {
Node::Phi { control, ref data } => {
let phis = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().phis;
let preds = self.function.nodes[control.idx()].try_region().unwrap();
let mut names = vec![];
for (data, pred) in zip(data.into_iter(), preds.into_iter()) {
let pred_term = &mut blocks.get_mut(pred).unwrap().term;
let name = self.get_value(*data, self.vvs[id.idx()], pred_term)?;
names.push(name);
}
let phis = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().phis;
write!(
phis,
" {} = phi {} ",
self.get_value(id, false),
self.get_assign(id),
self.get_type(self.typing[id.idx()], self.try_vector(id))
)?;
for idx in 0..preds.len() {
......@@ -242,7 +259,7 @@ impl<'a> CPUContext<'a> {
write!(
phis,
"[ {}, %{} ]",
self.get_value(data[idx], false),
names[idx],
self.get_block_name(preds[idx])
)?;
}
......@@ -254,7 +271,7 @@ impl<'a> CPUContext<'a> {
write!(
body,
" {} = bitcast {} %p{} to {}\n",
self.get_value(id, false),
self.get_assign(id),
ty,
index,
ty
......@@ -263,7 +280,7 @@ impl<'a> CPUContext<'a> {
Node::Constant { id: cons_id } => {
let body = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().body;
if self.constants[cons_id.idx()].is_scalar() {
write!(body, " {} = bitcast ", self.get_value(id, false))?;
write!(body, " {} = bitcast ", self.get_assign(id))?;
match self.constants[cons_id.idx()] {
Constant::Boolean(val) => write!(body, "i1 {} to i1\n", val)?,
Constant::Integer8(val) => write!(body, "i8 {} to i8\n", val)?,
......@@ -292,16 +309,16 @@ impl<'a> CPUContext<'a> {
write!(
body,
" {} = getelementptr i8, ptr %backing, i64 %dc{}\n",
self.get_value(id, false),
self.get_assign(id),
offset.idx()
)?;
if !self.function.schedules[id.idx()].contains(&Schedule::NoResetConstant) {
let data_size = self.codegen_type_size(self.typing[id.idx()], body)?;
let id_name = self.get_value(id, VectorVariableState::Scalar, body)?;
write!(
body,
" call void @llvm.memset.p0.i64({}, i8 0, i64 {}, i1 false)\n",
self.get_value(id, true),
data_size,
" call void @llvm.memset.p0.i64(ptr {}, i8 0, i64 {}, i1 false)\n",
id_name, data_size,
)?;
}
}
......@@ -313,11 +330,7 @@ impl<'a> CPUContext<'a> {
let body = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().body;
// Thread IDs start at 0, and the other values are constructed
// in a vector on use.
write!(
body,
" {} = bitcast i64 0 to i64\n",
self.get_value(id, false),
)?
write!(body, " {} = bitcast i64 0 to i64\n", self.get_assign(id),)?
}
Node::DynamicConstant { id: dc_id } => {
let body = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().body;
......@@ -326,33 +339,38 @@ impl<'a> CPUContext<'a> {
write!(
body,
" {} = bitcast i64 %dc{} to i64\n",
self.get_value(id, false),
self.get_assign(id),
dc_id.idx()
)?
}
Node::Unary { op, input } => {
let body = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().body;
let input_name = self.get_value(input, self.vvs[id.idx()], body)?;
let input_ty = self.get_type(self.typing[input.idx()], None);
match op {
UnaryOperator::Not => write!(
body,
" {} = xor {}, -1\n",
self.get_value(id, false),
self.get_value(input, true)
" {} = xor {} {}, -1\n",
self.get_assign(id),
input_ty,
input_name
)?,
UnaryOperator::Neg => {
if self.types[self.typing[input.idx()].idx()].is_float() {
write!(
body,
" {} = fneg {}",
self.get_value(id, false),
self.get_value(input, true)
" {} = fneg {} {}",
self.get_assign(id),
input_ty,
input_name
)?
} else {
write!(
body,
" {} = mul {}, -1",
self.get_value(id, false),
self.get_value(input, true)
" {} = mul {} {}, -1",
self.get_assign(id),
input_ty,
input_name
)?
}
}
......@@ -404,10 +422,11 @@ impl<'a> CPUContext<'a> {
};
write!(
body,
" {} = {} {} to {}\n",
self.get_value(id, false),
" {} = {} {} {} to {}\n",
self.get_assign(id),
opcode,
self.get_value(input, true),
input_ty,
input_name,
self.get_type(dst_ty_id, self.try_vector(id)),
)?
}
......@@ -467,13 +486,17 @@ impl<'a> CPUContext<'a> {
};
let body = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().body;
let left_name = self.get_value(left, self.vvs[id.idx()], body)?;
let left_ty = self.get_type(self.typing[left.idx()], None);
let right_name = self.get_value(right, self.vvs[id.idx()], body)?;
write!(
body,
" {} = {} {}, {}\n",
self.get_value(id, false),
" {} = {} {} {}, {}\n",
self.get_assign(id),
opcode,
self.get_value(left, true),
self.get_value(right, false),
left_ty,
left_name,
right_name
)?
}
Node::Ternary {
......@@ -483,14 +506,23 @@ impl<'a> CPUContext<'a> {
third,
} => {
let body = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().body;
let first_name = self.get_value(first, self.vvs[id.idx()], body)?;
let first_ty = self.get_type(self.typing[first.idx()], None);
let second_name = self.get_value(second, self.vvs[id.idx()], body)?;
let second_ty = self.get_type(self.typing[second.idx()], None);
let third_name = self.get_value(third, self.vvs[id.idx()], body)?;
let third_ty = self.get_type(self.typing[third.idx()], None);
match op {
TernaryOperator::Select => write!(
body,
" {} = select {}, {}, {}\n",
self.get_value(id, false),
self.get_value(first, true),
self.get_value(second, true),
self.get_value(third, true)
" {} = select {} {}, {} {}, {} {}\n",
self.get_assign(id),
first_ty,
first_name,
second_ty,
second_name,
third_ty,
third_name
)?,
}
}
......@@ -499,10 +531,14 @@ impl<'a> CPUContext<'a> {
ref args,
} => {
let body = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().body;
let mut arg_names = vec![];
for arg in args {
arg_names.push(self.get_value(*arg, self.vvs[id.idx()], body)?);
}
write!(
body,
" {} = call {} {}(",
self.get_value(id, false),
self.get_assign(id),
self.get_type(self.typing[id.idx()], self.try_vector(id)),
convert_intrinsic(
&intrinsic,
......@@ -514,7 +550,8 @@ impl<'a> CPUContext<'a> {
if idx != 0 {
write!(body, ", ")?;
}
write!(body, "{}", self.get_value(args[idx], true))?;
let arg_ty = self.get_type(self.typing[args[idx].idx()], None);
write!(body, "{} {}", arg_ty, arg_names[idx])?;
}
write!(body, ")\n")?
}
......@@ -522,8 +559,9 @@ impl<'a> CPUContext<'a> {
collect,
ref indices,
} => {
assert!(self.vvs[id.idx()] == VectorVariableState::Scalar);
let body = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().body;
let collect_name = self.get_value(collect, false);
let collect_name = self.get_value(collect, VectorVariableState::Scalar, body)?;
let collect_ty = self.typing[collect.idx()];
let index_ptr_name =
self.codegen_index_math(&collect_name, collect_ty, indices, body)?;
......@@ -534,7 +572,7 @@ impl<'a> CPUContext<'a> {
write!(
body,
" {} = load {}, ptr {}\n",
self.get_value(id, false),
self.get_assign(id),
self.get_type(self_ty, self.try_vector(id)),
index_ptr_name
)?;
......@@ -544,7 +582,7 @@ impl<'a> CPUContext<'a> {
write!(
body,
" {} = bitcast ptr {} to ptr\n",
self.get_value(id, false),
self.get_assign(id),
index_ptr_name
)?;
}
......@@ -554,20 +592,23 @@ impl<'a> CPUContext<'a> {
data,
ref indices,
} => {
assert!(self.vvs[id.idx()] == VectorVariableState::Scalar);
let body = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().body;
let collect_name = self.get_value(collect, false);
let collect_name = self.get_value(collect, VectorVariableState::Scalar, body)?;
let collect_ty = self.typing[collect.idx()];
let data_name = self.get_value(data, VectorVariableState::Scalar, body)?;
let index_ptr_name =
self.codegen_index_math(&collect_name, collect_ty, indices, body)?;
let data_ty = self.typing[data.idx()];
let collect_ty_str = self.get_type(collect_ty, None);
let data_ty_str = self.get_type(data_ty, None);
if self.types[data_ty.idx()].is_primitive() {
// If the data item being written is a primitive type,
// perform a single store of the data value.
write!(
body,
" store {}, ptr {}\n",
self.get_value(data, true),
index_ptr_name
" store {} {}, ptr {}\n",
data_ty_str, data_name, index_ptr_name
)?;
} else {
// If the data item being written is not a primitive type,
......@@ -576,17 +617,16 @@ impl<'a> CPUContext<'a> {
let data_size = self.codegen_type_size(data_ty, body)?;
write!(
body,
" call void @llvm.memcpy.p0.p0.i64(ptr {}, {}, i64 {}, i1 false)\n",
index_ptr_name,
self.get_value(data, true),
data_size
" call void @llvm.memcpy.p0.p0.i64(ptr {}, {} {}, i64 {}, i1 false)\n",
index_ptr_name, data_ty_str, data_name, data_size
)?;
}
write!(
body,
" {} = bitcast {} to ptr\n",
self.get_value(id, false),
self.get_value(collect, true)
" {} = bitcast {} {} to ptr\n",
self.get_assign(id),
collect_ty_str,
collect_name,
)?;
}
Node::Undef { ty } => {
......@@ -595,7 +635,7 @@ impl<'a> CPUContext<'a> {
write!(
body,
" {} = bitcast {} undef to {}\n",
self.get_value(id, false),
self.get_assign(id),
ty,
ty
)?;
......@@ -812,7 +852,7 @@ impl<'a> CPUContext<'a> {
let elem_size = self.codegen_type_size(elem, body)?;
let mut acc_offset = "0".to_string();
for (p, s) in zip(pos, dims) {
let p = self.get_value(*p, false);
let p = self.get_value(*p, VectorVariableState::Scalar, body)?;
let s = format!("%dc{}", s.idx());
acc_offset = Self::multiply(&acc_offset, &s, body)?;
acc_offset = Self::append(&acc_offset, &p, body)?;
......@@ -889,16 +929,27 @@ impl<'a> CPUContext<'a> {
}
}
fn get_value(&self, id: NodeID, ty: bool) -> String {
if ty {
format!(
"{} %v{}",
self.get_type(self.typing[id.idx()], self.try_vector(id)),
id.idx()
)
} else {
format!("%v{}", id.idx())
}
fn get_assign(&self, id: NodeID) -> String {
format!("%v{}", id.idx())
}
fn get_value(
&self,
id: NodeID,
need: VectorVariableState,
body: &mut String,
) -> Result<String, Error> {
let name = format!("%gv.{}", Self::gen_filler_id());
let ty = self.get_type(self.typing[id.idx()], self.try_vector(id));
write!(
body,
" {} = bitcast {} %v{} to {}\n",
name,
ty,
id.idx(),
ty
)?;
Ok(name)
}
fn get_block_name(&self, id: NodeID) -> String {
......@@ -979,7 +1030,7 @@ impl<'a> CPUContext<'a> {
}
fn try_vector(&self, id: NodeID) -> Option<usize> {
if let VectorVariableState::Vector(factor) = self.vvs[id.idx()] {
if let VectorVariableState::Vector(factor, _) = self.vvs[id.idx()] {
Some(factor)
} else {
None
......
......@@ -79,10 +79,10 @@ pub enum MonoidReduction {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum VectorVariableState {
Scalar,
ScalarTIDOffset(usize),
Vector(usize),
MonoidReduce(usize, MonoidReduction),
ParallelWrite(usize),
ScalarTIDOffset(usize, NodeID),
Vector(usize, NodeID),
MonoidReduce(usize, NodeID, MonoidReduction),
ParallelWrite(usize, NodeID),
}
pub fn fork_vector_variable_states(
......@@ -120,7 +120,7 @@ pub fn fork_vector_variable_states(
// Start by setting every node in the fork join to have vector state.
for node in nodes_in_fork_joins[fork].iter() {
states[node.idx()] = VectorVariableState::Vector(factor);
states[node.idx()] = VectorVariableState::Vector(factor, *fork);
}
// Reduce cycles are scalar.
......@@ -133,7 +133,7 @@ pub fn fork_vector_variable_states(
// The thread ID is a scalar (zero), offset by the thread ID value.
for tid in tids.iter() {
states[tid.idx()] = VectorVariableState::ScalarTIDOffset(factor);
states[tid.idx()] = VectorVariableState::ScalarTIDOffset(factor, *fork);
}
// Adding a thread ID offset value to a scalar is still a thread ID
......@@ -145,11 +145,11 @@ pub fn fork_vector_variable_states(
right,
} = function.nodes[id.idx()]
&& ((states[left.idx()] == VectorVariableState::Scalar
&& states[right.idx()] == VectorVariableState::ScalarTIDOffset(factor))
|| (states[left.idx()] == VectorVariableState::ScalarTIDOffset(factor)
&& states[right.idx()] == VectorVariableState::ScalarTIDOffset(factor, *fork))
|| (states[left.idx()] == VectorVariableState::ScalarTIDOffset(factor, *fork)
&& states[right.idx()] == VectorVariableState::Scalar))
{
states[id.idx()] = VectorVariableState::ScalarTIDOffset(factor);
states[id.idx()] = VectorVariableState::ScalarTIDOffset(factor, *fork);
}
}
......@@ -195,8 +195,8 @@ pub fn fork_vector_variable_states(
} => MonoidReduction::Min,
_ => panic!(),
};
states[reduce.idx()] = VectorVariableState::MonoidReduce(factor, monoid);
states[reduct.idx()] = VectorVariableState::MonoidReduce(factor, monoid);
states[reduce.idx()] = VectorVariableState::MonoidReduce(factor, *fork, monoid);
states[reduct.idx()] = VectorVariableState::MonoidReduce(factor, *fork, monoid);
}
// Identify parallel reduces and set their vector state.
......@@ -212,8 +212,8 @@ pub fn fork_vector_variable_states(
} = function.nodes[reduct.idx()]
&& collect == *reduce
{
states[reduce.idx()] = VectorVariableState::ParallelWrite(factor);
states[reduct.idx()] = VectorVariableState::ParallelWrite(factor);
states[reduce.idx()] = VectorVariableState::ParallelWrite(factor, *fork);
states[reduct.idx()] = VectorVariableState::ParallelWrite(factor, *fork);
}
}
}
......
......@@ -31,10 +31,11 @@ gvn(dot);
dce(dot);
infer-schedules(dot);
unforkify-one(out);
//unforkify-one(out);
unforkify(out);
ccp(out);
simplify-cfg(out);
gvn(out);
dce(out);
xdot[true](*);
//xdot[true](*);
gcm(*);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment