Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • llvm/hercules
1 result
Show changes
Commits on Source (2)
Showing
with 750 additions and 107 deletions
# Highlight juno source files like they're rust source files
*.jn gitlab-language=rust
......@@ -1119,6 +1119,31 @@ dependencies = [
"with_builtin_macros",
]
[[package]]
name = "juno_backprop"
version = "0.1.0"
dependencies = [
"async-std",
"clap",
"hercules_rt",
"juno_build",
"nom 6.2.2",
"rand 0.9.0",
"with_builtin_macros",
]
[[package]]
name = "juno_bfs"
version = "0.1.0"
dependencies = [
"async-std",
"clap",
"hercules_rt",
"juno_build",
"nom 6.2.2",
"with_builtin_macros",
]
[[package]]
name = "juno_build"
version = "0.1.0"
......@@ -1150,6 +1175,18 @@ dependencies = [
"with_builtin_macros",
]
[[package]]
name = "juno_cfd"
version = "0.1.0"
dependencies = [
"async-std",
"clap",
"hercules_rt",
"juno_build",
"nom 6.2.2",
"with_builtin_macros",
]
[[package]]
name = "juno_concat"
version = "0.1.0"
......@@ -1321,6 +1358,18 @@ dependencies = [
"with_builtin_macros",
]
[[package]]
name = "juno_srad"
version = "0.1.0"
dependencies = [
"async-std",
"clap",
"hercules_rt",
"juno_build",
"nom 6.2.2",
"with_builtin_macros",
]
[[package]]
name = "juno_utils"
version = "0.1.0"
......
......@@ -28,6 +28,10 @@ members = [
"juno_samples/multi_device",
"juno_samples/patterns",
"juno_samples/products",
"juno_samples/rodinia/backprop",
"juno_samples/rodinia/bfs",
"juno_samples/rodinia/cfd",
"juno_samples/rodinia/srad",
"juno_samples/schedule_test",
"juno_samples/simple3",
"juno_scheduler",
......
......@@ -964,7 +964,17 @@ fn convert_type(ty: &Type) -> &'static str {
fn convert_intrinsic(intrinsic: &Intrinsic, ty: &Type) -> String {
let intrinsic = match intrinsic {
Intrinsic::Abs => "abs",
Intrinsic::Abs => {
if ty.is_float() {
"fabs"
} else if ty.is_signed() {
"abs"
} else if ty.is_unsigned() {
panic!("llvm doesn't define abs for unsigned integers")
} else {
panic!()
}
},
Intrinsic::ACos => "acos",
Intrinsic::ASin => "asin",
Intrinsic::ATan => "atan",
......
......@@ -2004,7 +2004,7 @@ extern \"C\" {} {}(",
fn codegen_intrinsic(&self, intrinsic: &Intrinsic, ty: &Type) -> String {
let func_name = match intrinsic {
Intrinsic::Abs => match ty {
Type::Float32 => "__fabsf",
Type::Float32 => "fabsf",
Type::Float64 => "__fabs",
ty if ty.is_signed() => "abs",
ty if ty.is_unsigned() => "uabs",
......
......@@ -180,6 +180,13 @@ pub fn outline(
editor.edit(|mut edit| {
// Step 2: assemble the outlined function.
let u32_ty = edit.add_type(Type::UnsignedInteger32);
let return_types: Box<[_]> = return_idx_to_inside_id
.iter()
.map(|id| typing[id.idx()])
.chain(callee_succ_return_idx.map(|_| u32_ty))
.collect();
let single_return = return_types.len() == 1;
let mut outlined = Function {
name: format!(
"{}_{}",
......@@ -191,13 +198,11 @@ pub fn outline(
.map(|id| typing[id.idx()])
.chain(callee_pred_param_idx.map(|_| u32_ty))
.collect(),
return_type: edit.add_type(Type::Product(
return_idx_to_inside_id
.iter()
.map(|id| typing[id.idx()])
.chain(callee_succ_return_idx.map(|_| u32_ty))
.collect(),
)),
return_type: if single_return {
return_types[0]
} else {
edit.add_type(Type::Product(return_types))
},
num_dynamic_constants: edit.get_num_dynamic_constant_params(),
entry: false,
nodes: vec![],
......@@ -393,18 +398,24 @@ pub fn outline(
data_ids.push(cons_node_id);
}
// Build the return product.
let mut construct_id = NodeID::new(outlined.nodes.len());
outlined.nodes.push(Node::Constant { id: cons_id });
for (idx, data) in data_ids.into_iter().enumerate() {
let write = Node::Write {
collect: construct_id,
data: data,
indices: Box::new([Index::Field(idx)]),
};
construct_id = NodeID::new(outlined.nodes.len());
outlined.nodes.push(write);
}
// Build the return value
let construct_id = if single_return {
assert!(data_ids.len() == 1);
data_ids.pop().unwrap()
} else {
let mut construct_id = NodeID::new(outlined.nodes.len());
outlined.nodes.push(Node::Constant { id: cons_id });
for (idx, data) in data_ids.into_iter().enumerate() {
let write = Node::Write {
collect: construct_id,
data: data,
indices: Box::new([Index::Field(idx)]),
};
construct_id = NodeID::new(outlined.nodes.len());
outlined.nodes.push(write);
}
construct_id
};
// Return the return product.
outlined.nodes.push(Node::Return {
......@@ -505,16 +516,20 @@ pub fn outline(
};
// Create the read nodes from the call node to get the outputs of the
// outlined function.
let output_reads: Vec<_> = (0..return_idx_to_inside_id.len())
.map(|idx| {
let read = Node::Read {
collect: call_id,
indices: Box::new([Index::Field(idx)]),
};
edit.add_node(read)
})
.collect();
// outlined function (if there are multiple returned values)
let output_reads: Vec<_> = if single_return {
vec![call_id]
} else {
(0..return_idx_to_inside_id.len())
.map(|idx| {
let read = Node::Read {
collect: call_id,
indices: Box::new([Index::Field(idx)]),
};
edit.add_node(read)
})
.collect()
};
let indicator_read = callee_succ_return_idx.map(|idx| {
let read = Node::Read {
collect: call_id,
......
......@@ -38,14 +38,33 @@ use crate::*;
*
* - Write: the write node writes primitive fields in product values - these get
* replaced by a direct def of the field value
*
* The allow_sroa_arrays variable controls whether products that contain arrays
* will be broken into pieces. This option is useful to have since breaking
* these products up can be expensive if it requires destructing and
* reconstructing the product at any point.
*
* TODO: Handle partial selections (i.e. immutable nodes). This will involve
* actually tracking each source and use of a product and verifying that all of
* the nodes involved are mutable.
*/
pub fn sroa(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, types: &Vec<TypeID>) {
pub fn sroa(
editor: &mut FunctionEditor,
reverse_postorder: &Vec<NodeID>,
types: &Vec<TypeID>,
allow_sroa_arrays: bool,
) {
let mut types: HashMap<NodeID, TypeID> = types
.iter()
.enumerate()
.map(|(i, t)| (NodeID::new(i), *t))
.collect();
let can_sroa_type = |editor: &FunctionEditor, typ: TypeID| {
editor.get_type(typ).is_product()
&& (allow_sroa_arrays || !type_contains_array(editor, typ))
};
// This map stores a map from NodeID to an index tree which can be used to lookup the NodeID
// that contains the corresponding fields of the original value
let mut field_map: HashMap<NodeID, IndexTree<NodeID>> = HashMap::new();
......@@ -67,7 +86,7 @@ pub fn sroa(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, types:
second: _,
third: _,
op: TernaryOperator::Select,
} if editor.get_type(types[&node]).is_product() => product_nodes.push(*node),
} if can_sroa_type(editor, types[&node]) => product_nodes.push(*node),
Node::Write {
collect,
......@@ -83,19 +102,23 @@ pub fn sroa(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, types:
let mut fields = vec![];
let mut remainder = vec![];
let mut indices = indices.iter();
while let Some(idx) = indices.next() {
if idx.is_field() {
fields.push(idx.clone());
} else {
remainder.push(idx.clone());
remainder.extend(indices.cloned());
break;
if can_sroa_type(editor, types[&node]) {
let mut indices = indices.iter();
while let Some(idx) = indices.next() {
if idx.is_field() {
fields.push(idx.clone());
} else {
remainder.push(idx.clone());
remainder.extend(indices.cloned());
break;
}
}
} else {
remainder.extend_from_slice(indices);
}
if fields.is_empty() {
if editor.get_type(types[&data]).is_product() {
if can_sroa_type(editor, types[&data]) {
(None, Some((*node, collect, remainder)))
} else {
(None, None)
......@@ -205,9 +228,13 @@ pub fn sroa(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, types:
// that information to the node map for the rest of SROA (this produces some reads
// that mix types of indices, since we only read leaves but that's okay since those
// reads are not handled by SROA)
let indices = indices
.chunk_by(|i, j| i.is_field() && j.is_field())
.collect::<Vec<_>>();
let indices = if can_sroa_type(editor, types[collect]) {
indices
.chunk_by(|i, j| i.is_field() == j.is_field())
.collect::<Vec<_>>()
} else {
vec![indices.as_ref()]
};
let (field_reads, non_fields_produce_prod) = {
if indices.len() == 0 {
......@@ -217,9 +244,9 @@ pub fn sroa(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, types:
} else if indices.len() == 1 {
// If once we perform chunking there's only one set of indices, we can just
// use the original node
if indices[0][0].is_field() {
if can_sroa_type(editor, types[collect]) {
(vec![*node], vec![])
} else if editor.get_type(types[node]).is_product() {
} else if can_sroa_type(editor, types[node]) {
(vec![], vec![*node])
} else {
(vec![], vec![])
......@@ -278,7 +305,7 @@ pub fn sroa(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, types:
// We add all calls to the call/return list and check their arguments later
Node::Call { .. } => call_return_nodes.push(*node),
Node::Return { control: _, data } if editor.get_type(types[&data]).is_product() => {
Node::Return { control: _, data } if can_sroa_type(editor, types[&data]) => {
call_return_nodes.push(*node)
}
......@@ -296,7 +323,7 @@ pub fn sroa(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, types:
for node in call_return_nodes {
match &editor.func().nodes[node.idx()] {
Node::Return { control, data } => {
assert!(editor.get_type(types[&data]).is_product());
assert!(can_sroa_type(editor, types[&data]));
let control = *control;
let new_data = reconstruct_product(editor, types[&data], *data, &mut product_nodes);
editor.edit(|mut edit| {
......@@ -319,8 +346,8 @@ pub fn sroa(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, types:
let dynamic_constants = dynamic_constants.clone();
let args = args.clone();
// If the call returns a product, we generate reads for each field
let fields = if editor.get_type(types[&node]).is_product() {
// If the call returns a product that we can sroa, we generate reads for each field
let fields = if can_sroa_type(editor, types[&node]) {
Some(generate_reads(editor, types[&node], node))
} else {
None
......@@ -328,7 +355,7 @@ pub fn sroa(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, types:
let mut new_args = vec![];
for arg in args {
if editor.get_type(types[&arg]).is_product() {
if can_sroa_type(editor, types[&arg]) {
new_args.push(reconstruct_product(
editor,
types[&arg],
......@@ -489,7 +516,7 @@ pub fn sroa(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, types:
indices,
} => {
if let Some(index_map) = field_map.get(collect) {
if editor.get_type(types[&data]).is_product() {
if can_sroa_type(editor, types[&data]) {
if let Some(data_idx) = field_map.get(data) {
field_map.insert(
node,
......@@ -698,6 +725,16 @@ pub fn sroa(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, types:
});
}
fn type_contains_array(editor: &FunctionEditor, typ: TypeID) -> bool {
match &*editor.get_type(typ) {
Type::Array(_, _) => true,
Type::Product(ts) | Type::Summation(ts) => {
ts.iter().any(|t| type_contains_array(editor, *t))
}
_ => false,
}
}
// An index tree is used to store results at many index lists
#[derive(Clone, Debug)]
pub enum IndexTree<T> {
......
......@@ -284,14 +284,6 @@ impl<'a> HerculesCUDARefMut<'a> {
}
}
pub fn dup(&'a mut self) -> Self {
HerculesCUDARefMut {
ptr: self.ptr,
size: self.size,
_phantom: PhantomData,
}
}
pub unsafe fn __ptr(&self) -> *mut u8 {
self.ptr.as_ptr()
}
......@@ -309,6 +301,17 @@ impl<'a> HerculesCUDARefMut<'a> {
}
}
#[cfg(feature = "cuda")]
impl<'a, 'b: 'a> HerculesCUDARefMut<'b> {
pub fn dup(&'a mut self) -> HerculesCUDARefMut<'a> {
HerculesCUDARefMut {
ptr: self.ptr,
size: self.size,
_phantom: PhantomData,
}
}
}
#[cfg(feature = "cuda")]
impl CUDABox {
pub fn from_cpu_ref(cpu_ref: HerculesCPURef) -> Self {
......@@ -662,7 +665,7 @@ impl<'a, T> From<HerculesCUDARefMut<'a>> for HerculesMutBox<'a, T> {
}
}
impl<'a, T> HerculesMutBox<'a, T>
impl<'a, 'b: 'a, T> HerculesMutBox<'b, T>
where
T: Default + Clone,
{
......@@ -688,7 +691,7 @@ where
let elements = unsafe { cuda_ref.__size() / size_of::<T>() };
// Allocate host memory (if needed)
let cpu_alloc: Allocation<&'a mut [T], Vec<T>> = match self.cpu_alloc.take() {
let cpu_alloc: Allocation<&'b mut [T], Vec<T>> = match self.cpu_alloc.take() {
Allocation::Reference(val) if val.len() == elements => {
Allocation::Reference(val)
}
......@@ -793,7 +796,7 @@ pub trait HerculesMutBoxTo<'a, T> {
fn to(&'a mut self) -> T;
}
impl<'a, T> HerculesMutBoxTo<'a, HerculesCPURefMut<'a>> for HerculesMutBox<'a, T>
impl<'a, 'b: 'a, T> HerculesMutBoxTo<'a, HerculesCPURefMut<'a>> for HerculesMutBox<'b, T>
where
T: Default + Clone,
{
......@@ -803,7 +806,7 @@ where
}
#[cfg(feature = "cuda")]
impl<'a, T> HerculesMutBoxTo<'a, HerculesCUDARefMut<'a>> for HerculesMutBox<'a, T>
impl<'a, 'b: 'a, T> HerculesMutBoxTo<'a, HerculesCUDARefMut<'a>> for HerculesMutBox<'b, T>
where
T: Default + Clone,
{
......
......@@ -540,25 +540,31 @@ impl CodeGenerator<'_> {
block = after_call_region;
// Read each of the "inout values" and perform the SSA update
let inouts_index = self.builder.builder.create_field_index(1);
let has_inouts = !inouts.is_empty();
// TODO: We should omit unit returns, if we do so the + 1 below is not needed
for (idx, var) in inouts.into_iter().enumerate() {
let index = self.builder.builder.create_field_index(idx);
let index = self.builder.builder.create_field_index(idx + 1);
let mut read = self.builder.allocate_node();
let read_id = read.id();
read.build_read(call_id, vec![inouts_index.clone(), index].into());
read.build_read(call_id, vec![index].into());
self.builder.add_node(read);
ssa.write_variable(var, block, read_id);
}
// Read the "actual return" value and return it
let value_index = self.builder.builder.create_field_index(0);
let mut read = self.builder.allocate_node();
let read_id = read.id();
read.build_read(call_id, vec![value_index].into());
self.builder.add_node(read);
let result = if !has_inouts {
call_id
} else {
let value_index = self.builder.builder.create_field_index(0);
let mut read = self.builder.allocate_node();
let read_id = read.id();
read.build_read(call_id, vec![value_index].into());
self.builder.add_node(read);
read_id
};
(read_id, block)
(result, block)
}
Expr::Intrinsic {
id,
......
......@@ -28,6 +28,7 @@ for "for"
if "if"
inout "inout"
integer "integer"
in "in"
let "let"
match "match"
mod "mod"
......@@ -128,7 +129,7 @@ _ "_"
0x[0-9a-fA-F]+ "HEX_INT"
0b[0-1]+ "BIN_INT"
0o[0-7]+ "OCT_INT"
[0-9]+\.[0-9]*(|e[0-9]+) "FLOAT_LIT"
[0-9]+\.[0-9]+(|e[0-9]+) "FLOAT_LIT"
@[a-zA-Z0-9_]+ "LABEL"
. "UNMATCHED"
......
......@@ -99,13 +99,18 @@ TypeDef -> Result<TyDef, ()>
;
ObjFields -> Result<Vec<ObjField>, ()>
: { Ok(vec![]) }
| ObjFields ObjField { flatten($1, $2) }
: ObjFieldList { Ok($1?.into_iter().collect()) }
;
ObjFieldList -> Result<VecDeque<ObjField>, ()>
: { Ok(VecDeque::new()) }
| ObjField { Ok(VecDeque::from([$1?])) }
| ObjField ',' ObjFieldList { let mut lst = $3?; lst.push_front($1?); Ok(lst) }
| ObjField ';' ObjFieldList { let mut lst = $3?; lst.push_front($1?); Ok(lst) }
;
ObjField -> Result<ObjField, ()>
: PubOption 'ID' ';'
: PubOption 'ID'
{ Ok(ObjField{ span : $span, public : $1?, name : span_of_tok($2)?, typ : None }) }
| PubOption 'ID' ':' Type ';'
| PubOption 'ID' ':' Type
{ Ok(ObjField{ span : $span, public : $1?, name : span_of_tok($2)?, typ : Some($4?) }) }
;
......@@ -287,11 +292,17 @@ Stmt -> Result<Stmt, ()>
| 'match' NonStructExpr Cases
{ Ok(Stmt::MatchStmt{ span : $span, expr : $2?, body : $3? }) }
| 'for' VarBind '=' NonStructExpr 'to' NonStructExpr Stmts
{ Ok(Stmt::ForStmt{ span : $span, var : $2?, init : $4?, bound : $6?, step : None,
body : Box::new($7?) }) }
{ Ok(Stmt::ForStmt{ span : $span, var : $2?, init : $4?, bound : $6?,
inclusive: false, step : None, body : Box::new($7?) }) }
| 'for' VarBind '=' NonStructExpr 'to' NonStructExpr 'by' SignedIntLit Stmts
{ Ok(Stmt::ForStmt{ span : $span, var : $2?, init : $4?, bound : $6?, step : Some($8?),
body : Box::new($9?) }) }
{ Ok(Stmt::ForStmt{ span : $span, var : $2?, init : $4?, bound : $6?,
inclusive: false, step : Some($8?), body : Box::new($9?) }) }
| 'for' VarBind 'in' NonStructExpr '..' NonStructExpr Stmts
{ Ok(Stmt::ForStmt{ span: $span, var: $2?, init: $4?, bound: $6?,
inclusive: false, step: None, body: Box::new($7?) }) }
| 'for' VarBind 'in' NonStructExpr '..' '=' NonStructExpr Stmts
{ Ok(Stmt::ForStmt{ span: $span, var: $2?, init: $4?, bound: $7?,
inclusive: true, step: None, body: Box::new($8?) }) }
| 'while' NonStructExpr Stmts
{ Ok(Stmt::WhileStmt{ span : $span, cond : $2?, body : Box::new($3?) }) }
| 'return' ';'
......@@ -457,12 +468,16 @@ Expr -> Result<Expr, ()>
{ Ok(Expr::IntrinsicExpr{ span : $span, name : $1?, ty_args : Some($5?), args: $8? }) }
;
IdExprs -> Result<Vec<(Id, Expr)>, ()>
: 'ID' '=' Expr { Ok(vec![(span_of_tok($1)?, $3?)]) }
| IdExprsS ',' 'ID' '=' Expr { flatten($1, res_pair(span_of_tok($3), $5)) }
: IdExprList { Ok($1?.into_iter().collect()) }
;
IdExprList -> Result<VecDeque<(Id, Expr)>, ()>
: { Ok(VecDeque::new()) }
| IdExpr { Ok(VecDeque::from([$1?])) }
| IdExpr ',' IdExprList { let mut lst = $3?; lst.push_front($1?); Ok(lst) }
;
IdExprsS -> Result<Vec<(Id, Expr)>, ()>
: 'ID' '=' Expr { Ok(vec![(span_of_tok($1)?, $3?)]) }
| IdExprsS ',' 'ID' '=' Expr { flatten($1, res_pair(span_of_tok($3), $5)) }
IdExpr -> Result<(Id, Expr), ()>
: 'ID' ':' Expr { Ok((span_of_tok($1)?, $3?)) }
| 'ID' '=' Expr { Ok((span_of_tok($1)?, $3?)) }
;
Params -> Result<Vec<(bool, Expr)>, ()>
: { Ok(vec![]) }
......@@ -678,9 +693,9 @@ pub enum Stmt {
AssignStmt { span : Span, lhs : LExpr, assign : AssignOp, assign_span : Span, rhs : Expr },
IfStmt { span : Span, cond : Expr, thn : Box<Stmt>, els : Option<Box<Stmt>> },
MatchStmt { span : Span, expr : Expr, body : Vec<Case> },
// The step records: negative, number, base
ForStmt { span : Span, var : VarBind, init : Expr, bound : Expr, step : Option<(bool, Span, IntBase)>,
body : Box<Stmt> },
// The step records: negative, number, base, inclusive records whether the bound is included in the range
ForStmt { span : Span, var : VarBind, init : Expr, bound : Expr,
inclusive: bool, step : Option<(bool, Span, IntBase)>, body : Box<Stmt> },
WhileStmt { span : Span, cond : Expr, body : Box<Stmt> },
ReturnStmt { span : Span, expr : Option<Expr> },
BreakStmt { span : Span },
......
......@@ -808,8 +808,14 @@ fn analyze_program(
// Compute the proper type accounting for the inouts (which become returns)
let mut inout_types = inouts.iter().map(|e| e.get_type()).collect::<Vec<_>>();
let inout_tuple = types.new_tuple(inout_types);
let pure_return_type = types.new_tuple(vec![return_type, inout_tuple]);
let mut return_types = vec![return_type];
return_types.extend(inout_types);
// TODO: Ideally we would omit unit returns
let pure_return_type = if return_types.len() == 1 {
return_types.pop().unwrap()
} else {
types.new_tuple(return_types)
};
// Finally, we have a properly built environment and we can
// start processing the body
......@@ -1993,6 +1999,7 @@ fn process_stmt(
},
init,
bound,
inclusive,
step,
body,
} => {
......@@ -2124,10 +2131,19 @@ fn process_stmt(
val: bound_val,
};
// The condition of the loop is var < bound, unless the step is negative in which case
// it is var > bound
// There are four cases for the condition that we generate, though it always takes the
// form var OP bound:
// 1. The step is positive and the range is exclusive of the bound, OP = <
// 2. The step is positive and the range is inclusive of the bound, OP = <=
// 3. The step is negative and the range is exclusive of the bound, OP = >
// 4. The step is negative and the range is inclusive of the bound, OP = >=
let condition = Expr::BinaryExp {
op: if step_pos { BinaryOp::Lt } else { BinaryOp::Gt },
op: match (step_pos, inclusive) {
(true, false) => BinaryOp::Lt,
(true, true) => BinaryOp::Le,
(false, false) => BinaryOp::Gt,
(false, true) => BinaryOp::Ge,
},
lhs: Box::new(Expr::Variable {
var: var,
typ: var_type,
......@@ -4809,7 +4825,7 @@ fn process_expr(
};
// Now, process the arguments to ensure they has the type needed by this
// constructor
// function
let mut arg_vals: Vec<Either<Expr, usize>> = vec![];
let mut errors = LinkedList::new();
......@@ -5009,19 +5025,21 @@ fn process_expr(
}
fn generate_return(expr: Expr, inouts: &Vec<Expr>, types: &mut TypeSolver) -> Stmt {
let inout_types = inouts.iter().map(|e| e.get_type()).collect();
let inout_type = types.new_tuple(inout_types);
let inout_types = inouts.iter().map(|e| e.get_type()).collect::<Vec<_>>();
let inout_vals = Expr::Tuple {
vals: inouts.clone(),
typ: inout_type,
};
let mut return_types = vec![expr.get_type()];
return_types.extend(inout_types);
let expr_type = expr.get_type();
let mut return_vals = vec![expr];
return_vals.extend_from_slice(inouts);
let val = Expr::Tuple {
vals: vec![expr, inout_vals],
typ: types.new_tuple(vec![expr_type, inout_type]),
let val = if return_vals.len() == 1 {
return_vals.pop().unwrap()
} else {
Expr::Tuple {
vals: return_vals,
typ: types.new_tuple(return_types),
}
};
Stmt::ReturnStmt { expr: val }
......
# Rodinia Benchmarks
This directory contains several of the benchmarks from the [Rodinia Benchmark Suite](http://www.cs.virginia.edu/rodinia/doku.php) ported into Juno.
The implementations are based on those provided with Rodinia version 3.1.
[package]
name = "juno_backprop"
version = "0.1.0"
authors = ["Aaron Councilman <aaronjc4@illinois.edu>"]
edition = "2021"
[[bin]]
name = "juno_backprop"
path = "src/main.rs"
[features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"]
[build-dependencies]
juno_build = { path = "../../../juno_build" }
[dependencies]
juno_build = { path = "../../../juno_build" }
hercules_rt = { path = "../../../hercules_rt" }
async-std = "*"
clap = { version = "*", features = ["derive"] }
with_builtin_macros = "0.1.0"
nom = "*"
rand = "0.9.0"
use juno_build::JunoCompiler;
fn main() {
#[cfg(feature = "cuda")]
JunoCompiler::new()
.file_in_src("backprop.jn")
.unwrap()
.schedule_in_src("gpu.sch")
.unwrap()
.build()
.unwrap();
#[cfg(not(feature = "cuda"))]
JunoCompiler::new()
.file_in_src("backprop.jn")
.unwrap()
.schedule_in_src("cpu.sch")
.unwrap()
.build()
.unwrap();
}
fn squash(x: f32) -> f32 {
// Sigmoid
return 1.0 / (1.0 + exp!(-x));
}
fn layer_forward<n, m: usize>(vals: f32[n + 1], weights: f32[n + 1, m + 1]) -> f32[m + 1] {
let result : f32[m + 1];
result[0] = 1.0;
for j in 1..=m {
let sum = 0.0;
for k in 0..=n {
sum += weights[k, j] * vals[k];
}
result[j] = squash(sum);
}
return result;
}
fn output_error<n: usize>(target: f32[n + 1], actual: f32[n + 1]) -> (f32, f32[n + 1]) {
let errsum = 0.0;
let delta : f32[n + 1];
for j in 1..=n {
let a = actual[j];
let t = target[j];
delta[j] = a * (1.0 - a) * (t - a);
errsum += abs!(delta[j]);
}
return (errsum, delta);
}
fn hidden_error<hidden_n, output_n: usize>(
out_delta: f32[output_n + 1],
hidden_weights: f32[hidden_n + 1, output_n + 1],
hidden_vals: f32[hidden_n + 1],
) -> (f32, f32[hidden_n + 1]) {
let errsum = 0.0;
let delta : f32[hidden_n + 1];
for j in 1..=hidden_n {
let h = hidden_vals[j];
let sum = 0.0;
for k in 1..=output_n {
sum += out_delta[k] * hidden_weights[j, k];
}
delta[j] = h * (1.0 - h) * sum;
errsum += abs!(delta[j]);
}
return (errsum, delta);
}
const ETA : f32 = 0.3;
const MOMENTUM : f32 = 0.3;
fn adjust_weights<n, m: usize>(
delta: f32[m + 1],
vals: f32[n + 1],
weights: f32[n + 1, m + 1],
prev_weights: f32[n + 1, m + 1]
) -> (f32[n + 1, m + 1], f32[n + 1, m + 1]) {
for j in 1..=m {
for k in 0..=n {
let new_dw = ETA * delta[j] * vals[k] + MOMENTUM * prev_weights[k, j];
weights[k, j] += new_dw;
prev_weights[k, j] = new_dw;
}
}
return (weights, prev_weights);
}
#[entry]
fn backprop<input_n, hidden_n, output_n: usize>(
input_vals: f32[input_n + 1],
input_weights: f32[input_n + 1, hidden_n + 1],
hidden_weights: f32[hidden_n + 1, output_n + 1],
target: f32[output_n + 1],
input_prev_weights: f32[input_n + 1, hidden_n + 1],
hidden_prev_weights: f32[hidden_n + 1, output_n + 1],
//) -> (f32, f32,
// f32[input_n + 1, hidden_n + 1], f32[input_n + 1, hidden_n + 1],
// f32[hidden_n + 1, output_n + 1], f32[hidden_n + 1, output_n + 1]) {
) -> (f32, f32, f32) {
let hidden_vals = layer_forward::<input_n, hidden_n>(input_vals, input_weights);
let output_vals = layer_forward::<hidden_n, output_n>(hidden_vals, hidden_weights);
let (out_err, out_delta) = output_error::<output_n>(target, output_vals);
let (hid_err, hid_delta) = hidden_error::<hidden_n, output_n>(out_delta, hidden_weights, hidden_vals);
let (hidden_weights, hidden_prev_weights)
= adjust_weights::<hidden_n, output_n>(out_delta, hidden_vals, hidden_weights, hidden_prev_weights);
let (input_weights, input_prev_weights)
= adjust_weights::<input_n, hidden_n>(hid_delta, input_vals, input_weights, input_prev_weights);
return (out_err, hid_err, input_weights[0, 0] + input_prev_weights[0, 0] + hidden_weights[0, 0] + hidden_prev_weights[0, 0]);
//return (input_weights, input_prev_weights, hidden_weights, hidden_prev_weights);
}
gvn(*);
dce(*);
phi-elim(*);
dce(*);
crc(*);
dce(*);
slf(*);
dce(*);
let auto = auto-outline(backprop);
cpu(auto.backprop);
inline(auto.backprop);
inline(auto.backprop);
delete-uncalled(*);
sroa[true](*);
dce(*);
float-collections(*);
reuse-products(*);
dce(*);
gcm(*);
gvn(*);
dce(*);
phi-elim(*);
dce(*);
crc(*);
dce(*);
slf(*);
dce(*);
let auto = auto-outline(backprop);
gpu(auto.backprop);
inline(auto.backprop);
inline(auto.backprop);
delete-uncalled(*);
sroa[true](*);
dce(*);
float-collections(*);
reuse-products(*);
dce(*);
gcm(*);
#![feature(concat_idents)]
juno_build::juno!("backprop");
mod rust_backprop;
use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox, HerculesMutBoxTo};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use clap::Parser;
#[derive(Parser)]
#[clap(author, version, about, long_about = None)]
struct BackpropInputs {
layer_size: usize,
}
fn run_backprop(
input_n: u64,
hidden_n: u64,
output_n: u64,
input_vals: &[f32],
input_weights: &[f32],
hidden_weights: &[f32],
target: &[f32],
input_prev_weights: &[f32],
hidden_prev_weights: &[f32],
) -> (f32, f32, Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) {
let input_vals = HerculesImmBox::from(input_vals);
let target = HerculesImmBox::from(target);
let mut input_weights = HerculesMutBox::from(input_weights.to_vec());
let mut hidden_weights = HerculesMutBox::from(hidden_weights.to_vec());
let mut input_prev_weights = HerculesMutBox::from(input_prev_weights.to_vec());
let mut hidden_prev_weights = HerculesMutBox::from(hidden_prev_weights.to_vec());
let mut runner = runner!(backprop);
let res = HerculesMutBox::from(async_std::task::block_on(async {
runner
.run(
input_n,
hidden_n,
output_n,
input_vals.to(),
input_weights.to(),
hidden_weights.to(),
target.to(),
input_prev_weights.to(),
hidden_prev_weights.to(),
)
.await
}))
.as_slice()
.to_vec();
let out_err = res[0];
let hid_err = res[1];
(
out_err,
hid_err,
input_weights.as_slice().to_vec(),
hidden_weights.as_slice().to_vec(),
input_prev_weights.as_slice().to_vec(),
hidden_prev_weights.as_slice().to_vec(),
)
}
fn compare_float(x: f32, y: f32) -> bool {
(x - y).abs() < 1e-5
}
fn compare_floats(xs: &[f32], ys: &[f32]) -> bool {
xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| compare_float(*x, *y))
}
fn backprop_harness(args: BackpropInputs) {
let BackpropInputs { layer_size } = args;
let mut rng = StdRng::seed_from_u64(7);
let input_n = layer_size;
let hidden_n = 16;
let output_n = 1;
let mut input_vals = vec![0.0; input_n + 1];
input_vals[0] = 1.0;
// For some reason the bpnn_randomize_row function used on target just sets it to 0.1
let target = vec![0.1; output_n + 1];
let input_weights = (0..(input_n + 1) * (hidden_n + 1))
.map(|_| rng.random::<f32>())
.collect::<Vec<_>>();
let hidden_weights = (0..(hidden_n + 1) * (output_n + 1))
.map(|_| rng.random::<f32>())
.collect::<Vec<_>>();
let input_prev_weights = vec![0.0; (input_n + 1) * (hidden_n + 1)];
let hidden_prev_weights = vec![0.0; (hidden_n + 1) * (output_n + 1)];
let (
juno_out_err,
juno_hid_err,
juno_input_weights,
juno_hidden_weights,
juno_input_prev_weights,
juno_hidden_prev_weights,
) = run_backprop(
input_n as u64,
hidden_n as u64,
output_n as u64,
&input_vals,
&input_weights,
&hidden_weights,
&target,
&input_prev_weights,
&hidden_prev_weights,
);
let (
rust_out_err,
rust_hid_err,
rust_input_weights,
rust_hidden_weights,
rust_input_prev_weights,
rust_hidden_prev_weights,
) = rust_backprop::backprop(
input_n,
hidden_n,
output_n,
&input_vals,
input_weights,
hidden_weights,
&target,
input_prev_weights,
hidden_prev_weights,
);
assert!(compare_float(juno_out_err, rust_out_err));
assert!(compare_float(juno_hid_err, rust_hid_err));
if !compare_floats(&juno_input_weights, &rust_input_weights) {
panic!("Input weights do not match after training");
}
if !compare_floats(&juno_hidden_weights, &rust_hidden_weights) {
panic!("Hidden weights do not match after training");
}
if !compare_floats(&juno_input_prev_weights, &rust_input_prev_weights) {
panic!("Input prev_weights do not match after training");
}
if !compare_floats(&juno_hidden_prev_weights, &rust_hidden_prev_weights) {
panic!("Hidden prev_weights do not match after training");
}
}
fn main() {
let args = BackpropInputs::parse();
backprop_harness(args);
}
#[test]
fn backprop_test() {
backprop_harness(BackpropInputs { layer_size: 65536 });
}
fn layer_forward(n: usize, m: usize, vals: &[f32], weights: &[f32]) -> Vec<f32> {
let mut result = vec![0.0; m + 1];
result[0] = 1.0;
for j in 1..=m {
let mut sum = 0.0;
for k in 0..=n {
sum += weights[k * (m + 1) + j] * vals[k];
}
result[j] = 1.0 / (1.0 + (-sum).exp());
}
result
}
fn output_error(n: usize, target: &[f32], actual: &[f32]) -> (f32, Vec<f32>) {
let mut result = vec![0.0; n + 1];
let mut error = 0.0;
for j in 1..=n {
let o = actual[j];
let t = target[j];
result[j] = o * (1.0 - o) * (t - o);
error += result[j].abs();
}
(error, result)
}
fn hidden_error(
n: usize,
m: usize,
delta: &[f32],
weights: &[f32],
actual: &[f32],
) -> (f32, Vec<f32>) {
let mut result = vec![0.0; n + 1];
let mut error = 0.0;
for j in 1..=n {
let h = actual[j];
let mut sum = 0.0;
for k in 1..=m {
sum += delta[k] * weights[j * (m + 1) + k];
}
result[j] = h * (1.0 - h) * sum;
error += result[j].abs();
}
(error, result)
}
fn adjust_weights(
n: usize,
m: usize,
delta: &[f32],
vals: &[f32],
mut weights: Vec<f32>,
mut prev_weights: Vec<f32>,
) -> (Vec<f32>, Vec<f32>) {
for j in 1..=m {
for k in 0..=n {
let new_dw = (0.3 * delta[j] * vals[k]) + (0.3 * prev_weights[k * (m + 1) + j]);
weights[k * (m + 1) + j] += new_dw;
prev_weights[k * (m + 1) + j] = new_dw;
}
}
(weights, prev_weights)
}
pub fn backprop(
input_n: usize,
hidden_n: usize,
output_n: usize,
input_vals: &[f32],
input_weights: Vec<f32>,
hidden_weights: Vec<f32>,
target: &[f32],
input_prev_weights: Vec<f32>,
hidden_prev_weights: Vec<f32>,
) -> (f32, f32, Vec<f32>, Vec<f32>, Vec<f32>, Vec<f32>) {
let hidden_vals = layer_forward(input_n, hidden_n, input_vals, &input_weights);
let output_vals = layer_forward(hidden_n, output_n, &hidden_vals, &hidden_weights);
let (out_err, out_delta) = output_error(output_n, target, &output_vals);
let (hid_err, hid_delta) = hidden_error(
hidden_n,
output_n,
&out_delta,
&hidden_weights,
&hidden_vals,
);
let (hidden_weights, hidden_prev_weights) = adjust_weights(
hidden_n,
output_n,
&out_delta,
&hidden_vals,
hidden_weights,
hidden_prev_weights,
);
let (input_weights, input_prev_weights) = adjust_weights(
input_n,
hidden_n,
&hid_delta,
&input_vals,
input_weights,
input_prev_weights,
);
(
out_err,
hid_err,
input_weights,
hidden_weights,
input_prev_weights,
hidden_prev_weights,
)
}