Skip to content
Snippets Groups Projects
Commit 9131736e authored by rarbore2's avatar rarbore2
Browse files

Merge branch 'hercules_box' into 'main'

Hercules Box

See merge request !100
parents 2a50b2b6 387098dc
No related branches found
No related tags found
1 merge request!100Hercules Box
Pipeline #200941 passed
Showing
with 302 additions and 281 deletions
......@@ -395,6 +395,7 @@ version = "0.1.0"
dependencies = [
"async-std",
"clap",
"hercules_rt",
"juno_build",
"rand",
"with_builtin_macros",
......@@ -651,6 +652,10 @@ dependencies = [
"take_mut",
]
[[package]]
name = "hercules_rt"
version = "0.1.0"
[[package]]
name = "hermit-abi"
version = "0.4.0"
......@@ -702,6 +707,7 @@ name = "juno_antideps"
version = "0.1.0"
dependencies = [
"async-std",
"hercules_rt",
"juno_build",
"with_builtin_macros",
]
......@@ -746,6 +752,7 @@ name = "juno_implicit_clone"
version = "0.1.0"
dependencies = [
"async-std",
"hercules_rt",
"juno_build",
"with_builtin_macros",
]
......@@ -755,6 +762,7 @@ name = "juno_matmul"
version = "0.1.0"
dependencies = [
"async-std",
"hercules_rt",
"juno_build",
"rand",
"with_builtin_macros",
......@@ -765,6 +773,7 @@ name = "juno_nested_ccp"
version = "0.1.0"
dependencies = [
"async-std",
"hercules_rt",
"juno_build",
"with_builtin_macros",
]
......@@ -784,6 +793,7 @@ name = "juno_simple3"
version = "0.1.0"
dependencies = [
"async-std",
"hercules_rt",
"juno_build",
"with_builtin_macros",
]
......@@ -905,6 +915,7 @@ version = "0.1.0"
dependencies = [
"async-std",
"clap",
"hercules_rt",
"juno_build",
"rand",
"with_builtin_macros",
......
......@@ -4,6 +4,7 @@ members = [
"hercules_cg",
"hercules_ir",
"hercules_opt",
"hercules_rt",
"hercules_tools/hercules_driver",
......
......@@ -58,7 +58,7 @@ impl<'a> RTContext<'a> {
// Dump the function signature.
write!(
w,
"#[allow(unused_variables,unused_mut)]\nasync fn {}(",
"#[allow(unused_variables,unused_mut,unused_parens)]\nasync fn {}<'a>(",
func.name
)?;
let mut first_param = true;
......@@ -81,75 +81,29 @@ impl<'a> RTContext<'a> {
if !self.module.types[func.param_types[idx].idx()].is_primitive() {
write!(w, "mut ")?;
}
write!(
w,
"p_i{}: {}",
idx,
self.get_type_interface(func.param_types[idx])
)?;
}
write!(w, ") -> {} {{\n", self.get_type_interface(func.return_type))?;
// Copy the "interface" parameters to "non-interface" parameters.
// The purpose of this is to convert collection objects from a Box<[u8]>
// type to a *mut u8 type. This name copying is done so that we can
// easily construct objects just after this by moving the "inferface"
// parameters.
for (idx, ty) in func.param_types.iter().enumerate() {
if self.module.types[ty.idx()].is_primitive() {
write!(w, " let p{} = p_i{};\n", idx, idx)?;
} else {
write!(
w,
" let p{} = ::std::boxed::Box::as_mut_ptr(&mut p_i{}) as *mut u8;\n",
idx, idx
)?;
}
write!(w, "p{}: {}", idx, self.get_type(func.param_types[idx]))?;
}
write!(w, ") -> {} {{\n", self.get_type(func.return_type))?;
// Collect the boxes representing ownership over collection objects for
// this function. The actual emitted computation is done entirely using
// pointers, so these get emitted to hold onto ownership over the
// underlying memory and to automatically clean them up when this
// function returns. Collection objects are inside Options, since their
// ownership may get passed to other called RT functions. If this
// function returns a collection object, then at the very end, right
// before the return, the to-be-returned pointer is compared against the
// owned collection objects - it should match exactly one of those
// objects, and that box is what's actually returned.
let mem_obj_ty = "::core::option::Option<::std::boxed::Box<[u8]>>";
// Allocate collection constants.
for object in self.collection_objects[&self.func_id].iter_objects() {
match self.collection_objects[&self.func_id].origin(object) {
CollectionObjectOrigin::Parameter(index) => write!(
w,
" let mut obj{}: {} = Some(p_i{});\n",
object.idx(),
mem_obj_ty,
index
)?,
CollectionObjectOrigin::Constant(id) => {
let size = self.codegen_type_size(self.typing[id.idx()]);
write!(
w,
" let mut obj{}: {} = Some((0..{}).map(|_| 0u8).collect());\n",
object.idx(),
mem_obj_ty,
size
)?
}
CollectionObjectOrigin::Call(_) | CollectionObjectOrigin::Undef(_) => write!(
if let CollectionObjectOrigin::Constant(id) =
self.collection_objects[&self.func_id].origin(object)
{
let size = self.codegen_type_size(self.typing[id.idx()]);
write!(
w,
" let mut obj{}: {} = None;\n",
" let mut obj{}: ::hercules_rt::HerculesBox = unsafe {{ ::hercules_rt::HerculesBox::__zeros({}) }};\n",
object.idx(),
mem_obj_ty,
)?,
size
)?
}
}
// Dump signatures for called CPU functions.
// Dump signatures for called device functions.
write!(w, " extern \"C\" {{\n")?;
for callee in self.callgraph.get_callees(self.func_id) {
if self.devices[callee.idx()] != Device::LLVM {
if self.devices[callee.idx()] == Device::AsyncRust {
continue;
}
let callee = &self.module.functions[callee.idx()];
......@@ -169,9 +123,9 @@ impl<'a> RTContext<'a> {
} else {
write!(w, ", ")?;
}
write!(w, "p{}: {}", idx, self.get_type(*ty))?;
write!(w, "p{}: {}", idx, self.device_get_type(*ty))?;
}
write!(w, ") -> {};\n", self.get_type(callee.return_type))?;
write!(w, ") -> {};\n", self.device_get_type(callee.return_type))?;
}
write!(w, " }}\n")?;
......@@ -190,7 +144,7 @@ impl<'a> RTContext<'a> {
} else if self.module.types[self.typing[idx].idx()].is_float() {
"0.0"
} else {
"::core::ptr::null::<u8>() as _"
"unsafe { ::hercules_rt::HerculesBox::__null() }"
}
)?;
}
......@@ -281,20 +235,7 @@ impl<'a> RTContext<'a> {
}
Node::Return { control: _, data } => {
let block = &mut blocks.get_mut(&id).unwrap();
let objects = self.collection_objects[&self.func_id].objects(data);
if objects.is_empty() {
write!(block, " return {};\n", self.get_value(data))?
} else {
// If the value to return is a collection object, figure out
// which object it actually is at runtime and return that
// box.
for object in objects {
write!(block, " if let Some(mut obj) = obj{} && ::std::boxed::Box::as_mut_ptr(&mut obj) as *mut u8 == {} {{\n", object.idx(), self.get_value(data))?;
write!(block, " return obj;\n")?;
write!(block, " }}\n")?;
}
write!(block, " panic!(\"HERCULES PANIC: Pointer to be returned doesn't match any known collection objects.\");\n")?
}
write!(block, " return {};\n", self.get_value(data))?
}
_ => panic!("PANIC: Can't lower {:?}.", func.nodes[id.idx()]),
}
......@@ -313,12 +254,21 @@ impl<'a> RTContext<'a> {
match func.nodes[id.idx()] {
Node::Parameter { index } => {
let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap();
write!(
block,
" {} = p{};\n",
self.get_value(id),
index
)?
if self.module.types[self.typing[id.idx()].idx()].is_primitive() {
write!(
block,
" {} = p{};\n",
self.get_value(id),
index
)?
} else {
write!(
block,
" {} = unsafe {{ p{}.__take() }};\n",
self.get_value(id),
index
)?
}
}
Node::Constant { id: cons_id } => {
let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap();
......@@ -339,11 +289,7 @@ impl<'a> RTContext<'a> {
let objects = self.collection_objects[&self.func_id].objects(id);
assert_eq!(objects.len(), 1);
let object = objects[0];
write!(
block,
"::std::boxed::Box::as_mut_ptr(obj{}.as_mut().unwrap()) as *mut u8",
object.idx()
)?
write!(block, "unsafe {{ obj{}.__take() }}", object.idx())?
}
}
write!(block, ";\n")?
......@@ -357,83 +303,86 @@ impl<'a> RTContext<'a> {
match self.devices[callee_id.idx()] {
Device::LLVM => {
let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap();
// First, get the raw pointers to collections that the
// device function takes as input.
let callee_objs = &self.collection_objects[&callee_id];
for (idx, arg) in args.into_iter().enumerate() {
if let Some(obj) = callee_objs.param_to_object(idx) {
// Extract a raw pointer from the HerculesBox.
if callee_objs.is_mutated(obj) {
write!(
block,
" let arg_tmp{} = unsafe {{ {}.__cpu_ptr_mut() }};\n",
idx,
self.get_value(*arg)
)?;
} else {
write!(
block,
" let arg_tmp{} = unsafe {{ {}.__cpu_ptr() }};\n",
idx,
self.get_value(*arg)
)?;
}
} else {
write!(
block,
" let arg_tmp{} = {};\n",
idx,
self.get_value(*arg)
)?;
}
}
// Emit the call.
write!(
block,
" {} = unsafe {{ {}(",
self.get_value(id),
" let call_tmp = unsafe {{ {}(",
self.module.functions[callee_id.idx()].name
)?;
for dc in dynamic_constants {
self.codegen_dynamic_constant(*dc, block)?;
write!(block, ", ")?;
}
for arg in args {
write!(block, "{}, ", self.get_value(*arg))?;
for idx in 0..args.len() {
write!(block, "arg_tmp{}, ", idx)?;
}
write!(block, ") }};\n")?;
// When a CPU function is called that returns a
// When a device function is called that returns a
// collection object, that object must have come from
// one of its parameters. Dynamically figure out which
// one it came from, so that we can move it to the slot
// of the output object.
let call_objects = self.collection_objects[&self.func_id].objects(id);
if !call_objects.is_empty() {
assert_eq!(call_objects.len(), 1);
let call_object = call_objects[0];
let callee_returned_objects =
self.collection_objects[&callee_id].returned_objects();
let possible_params: Vec<_> =
(0..self.module.functions[callee_id.idx()].param_types.len())
.filter(|idx| {
let object_of_param = self.collection_objects[&callee_id]
.param_to_object(*idx);
// Look at parameters that could be the
// source of the memory object returned
// by the function.
object_of_param
.map(|object_of_param| {
callee_returned_objects.contains(&object_of_param)
})
.unwrap_or(false)
})
.collect();
let arg_objects = args
.into_iter()
.enumerate()
.filter(|(idx, _)| possible_params.contains(idx))
.map(|(_, arg)| {
self.collection_objects[&self.func_id]
.objects(*arg)
.into_iter()
})
.flatten();
// Dynamically check which of the memory objects
// corresponding to arguments to the call was
// returned by the call. Move that memory object
// into the memory object of the call.
let mut first_obj = true;
for arg_object in arg_objects {
write!(block, " ")?;
if first_obj {
first_obj = false;
} else {
write!(block, "else ")?;
let caller_objects = self.collection_objects[&self.func_id].objects(id);
if !caller_objects.is_empty() {
for (idx, arg) in args.into_iter().enumerate() {
if idx != 0 {
write!(block, " else\n")?;
}
write!(block, "if let Some(obj) = obj{}.as_mut() && ::std::boxed::Box::as_mut_ptr(obj) as *mut u8 == {} {{\n", arg_object.idx(), self.get_value(id))?;
write!(
block,
" obj{} = obj{}.take();\n",
call_object.idx(),
arg_object.idx()
" if call_tmp == arg_tmp{} {{\n",
idx
)?;
write!(
block,
" {} = unsafe {{ {}.__take() }};\n",
self.get_value(id),
self.get_value(*arg)
)?;
write!(block, " }}\n")?;
write!(block, " }}")?;
}
write!(block, " else {{\n")?;
write!(block, " panic!(\"HERCULES PANIC: Pointer returned from called function doesn't match any known collection objects.\");\n")?;
write!(block, " panic!(\"HERCULES PANIC: Pointer returned from device function doesn't match an argument pointer.\");\n")?;
write!(block, " }}\n")?;
} else {
write!(
block,
" {} = call_tmp;\n",
self.get_value(id)
)?;
}
}
Device::AsyncRust => {
......@@ -452,7 +401,7 @@ impl<'a> RTContext<'a> {
if self.module.types[self.typing[arg.idx()].idx()].is_primitive() {
write!(block, "{}, ", self.get_value(*arg))?;
} else {
write!(block, "{}.take(), ", self.get_value(*arg))?;
write!(block, "unsafe {{ {}.__take() }}, ", self.get_value(*arg))?;
}
}
write!(block, ").await;\n")?;
......@@ -603,8 +552,8 @@ impl<'a> RTContext<'a> {
convert_type(&self.module.types[id.idx()])
}
fn get_type_interface(&self, id: TypeID) -> &'static str {
convert_type_interface(&self.module.types[id.idx()])
fn device_get_type(&self, id: TypeID) -> &'static str {
device_convert_type(&self.module.types[id.idx()])
}
}
......@@ -621,18 +570,27 @@ fn convert_type(ty: &Type) -> &'static str {
Type::UnsignedInteger64 => "u64",
Type::Float32 => "f32",
Type::Float64 => "f64",
Type::Product(_) | Type::Summation(_) | Type::Array(_, _) => "*mut u8",
Type::Product(_) | Type::Summation(_) | Type::Array(_, _) => {
"::hercules_rt::HerculesBox<'a>"
}
_ => panic!(),
}
}
/*
* Collection types are passed to / returned from runtime functions through a
* wrapper type for ownership tracking reasons.
*/
fn convert_type_interface(ty: &Type) -> &'static str {
fn device_convert_type(ty: &Type) -> &'static str {
match ty {
Type::Product(_) | Type::Summation(_) | Type::Array(_, _) => "Box<[u8]>",
_ => convert_type(ty),
Type::Boolean => "bool",
Type::Integer8 => "i8",
Type::Integer16 => "i16",
Type::Integer32 => "i32",
Type::Integer64 => "i64",
Type::UnsignedInteger8 => "u8",
Type::UnsignedInteger16 => "u16",
Type::UnsignedInteger32 => "u32",
Type::UnsignedInteger64 => "u64",
Type::Float32 => "f32",
Type::Float64 => "f64",
Type::Product(_) | Type::Summation(_) | Type::Array(_, _) => "*mut u8",
_ => panic!(),
}
}
[package]
name = "hercules_rt"
version = "0.1.0"
authors = ["Russel Arbore <rarbore2@illinois.edu>"]
[dependencies]
use std::alloc::{alloc, alloc_zeroed, dealloc, Layout};
use std::marker::PhantomData;
use std::mem::swap;
use std::ptr::{copy_nonoverlapping, NonNull};
use std::slice::from_raw_parts;
/*
* An in-memory collection object that can be used by functions compiled by the
* Hercules compiler.
*/
pub struct HerculesBox<'a> {
cpu_shared: Option<NonNull<u8>>,
cpu_exclusive: Option<NonNull<u8>>,
cpu_owned: Option<NonNull<u8>>,
size: usize,
_phantom: PhantomData<&'a u8>,
}
impl<'a> HerculesBox<'a> {
pub fn from_slice<T>(slice: &'a [T]) -> Self {
HerculesBox {
cpu_shared: Some(unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) }),
cpu_exclusive: None,
cpu_owned: None,
size: slice.len() * size_of::<T>(),
_phantom: PhantomData,
}
}
pub fn from_slice_mut<T>(slice: &'a mut [T]) -> Self {
HerculesBox {
cpu_shared: None,
cpu_exclusive: Some(unsafe { NonNull::new_unchecked(slice.as_mut_ptr() as *mut u8) }),
cpu_owned: None,
size: slice.len() * size_of::<T>(),
_phantom: PhantomData,
}
}
pub fn as_slice<T>(&'a self) -> &'a [T] {
assert_eq!(self.size % size_of::<T>(), 0);
unsafe { from_raw_parts(self.__cpu_ptr() as *const T, self.size / size_of::<T>()) }
}
unsafe fn into_cpu(&self) -> NonNull<u8> {
self.cpu_shared
.or(self.cpu_exclusive)
.or(self.cpu_owned)
.unwrap()
}
unsafe fn into_cpu_mut(&mut self) -> NonNull<u8> {
if let Some(ptr) = self.cpu_exclusive.or(self.cpu_owned) {
ptr
} else {
let ptr =
NonNull::new(alloc(Layout::from_size_align_unchecked(self.size, 16))).unwrap();
copy_nonoverlapping(self.cpu_shared.unwrap().as_ptr(), ptr.as_ptr(), self.size);
self.cpu_owned = Some(ptr);
self.cpu_shared = None;
ptr
}
}
pub unsafe fn __zeros(size: u64) -> Self {
assert_ne!(size, 0);
let size = size as usize;
HerculesBox {
cpu_shared: None,
cpu_exclusive: None,
cpu_owned: Some(
NonNull::new(alloc_zeroed(Layout::from_size_align_unchecked(size, 16))).unwrap(),
),
size: size,
_phantom: PhantomData,
}
}
pub unsafe fn __null() -> Self {
HerculesBox {
cpu_shared: None,
cpu_exclusive: None,
cpu_owned: None,
size: 0,
_phantom: PhantomData,
}
}
pub unsafe fn __take(&mut self) -> Self {
let mut ret = Self::__null();
swap(&mut ret, self);
ret
}
pub unsafe fn __cpu_ptr(&self) -> *mut u8 {
self.into_cpu().as_ptr()
}
pub unsafe fn __cpu_ptr_mut(&mut self) -> *mut u8 {
self.into_cpu_mut().as_ptr()
}
}
impl<'a> Drop for HerculesBox<'a> {
fn drop(&mut self) {
if let Some(ptr) = self.cpu_owned {
unsafe {
dealloc(
ptr.as_ptr(),
Layout::from_size_align_unchecked(self.size, 16),
)
}
}
}
}
......@@ -10,6 +10,7 @@ juno_build = { path = "../../juno_build" }
[dependencies]
clap = { version = "*", features = ["derive"] }
juno_build = { path = "../../juno_build" }
hercules_rt = { path = "../../hercules_rt" }
rand = "*"
async-std = "*"
with_builtin_macros = "0.1.0"
#![feature(box_as_ptr, let_chains)]
extern crate async_std;
extern crate hercules_rt;
extern crate juno_build;
use core::ptr::copy_nonoverlapping;
use hercules_rt::HerculesBox;
juno_build::juno!("dot");
fn main() {
async_std::task::block_on(async {
let a: Box<[f32]> = Box::new([0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0]);
let b: Box<[f32]> = Box::new([0.0, 5.0, 0.0, 6.0, 0.0, 7.0, 0.0, 8.0]);
let mut a_bytes: Box<[u8]> = Box::new([0; 32]);
let mut b_bytes: Box<[u8]> = Box::new([0; 32]);
unsafe {
copy_nonoverlapping(
Box::as_ptr(&a) as *const u8,
Box::as_mut_ptr(&mut a_bytes) as *mut u8,
32,
);
copy_nonoverlapping(
Box::as_ptr(&b) as *const u8,
Box::as_mut_ptr(&mut b_bytes) as *mut u8,
32,
);
};
let c = dot(8, a_bytes, b_bytes).await;
let a: [f32; 8] = [0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0];
let b: [f32; 8] = [0.0, 5.0, 0.0, 6.0, 0.0, 7.0, 0.0, 8.0];
let a = HerculesBox::from_slice(&a);
let b = HerculesBox::from_slice(&b);
let c = dot(8, a, b).await;
println!("{}", c);
assert_eq!(c, 70.0);
});
......
......@@ -10,6 +10,7 @@ juno_build = { path = "../../juno_build" }
[dependencies]
clap = { version = "*", features = ["derive"] }
juno_build = { path = "../../juno_build" }
hercules_rt = { path = "../../hercules_rt" }
rand = "*"
async-std = "*"
with_builtin_macros = "0.1.0"
#![feature(box_as_ptr, let_chains)]
extern crate async_std;
extern crate hercules_rt;
extern crate juno_build;
extern crate rand;
use core::ptr::copy_nonoverlapping;
use rand::random;
use hercules_rt::HerculesBox;
juno_build::juno!("matmul");
fn main() {
......@@ -15,31 +16,8 @@ fn main() {
const I: usize = 256;
const J: usize = 64;
const K: usize = 128;
let a: Box<[i32]> = (0..I * J).map(|_| random::<i32>() % 100).collect();
let b: Box<[i32]> = (0..J * K).map(|_| random::<i32>() % 100).collect();
let mut a_bytes: Box<[u8]> = Box::new([0; I * J * 4]);
let mut b_bytes: Box<[u8]> = Box::new([0; J * K * 4]);
unsafe {
copy_nonoverlapping(
Box::as_ptr(&a) as *const u8,
Box::as_mut_ptr(&mut a_bytes) as *mut u8,
I * J * 4,
);
copy_nonoverlapping(
Box::as_ptr(&b) as *const u8,
Box::as_mut_ptr(&mut b_bytes) as *mut u8,
J * K * 4,
);
};
let c_bytes = matmul(I as u64, J as u64, K as u64, a_bytes, b_bytes).await;
let mut c: Box<[i32]> = (0..I * K).map(|_| 0).collect();
unsafe {
copy_nonoverlapping(
Box::as_ptr(&c_bytes) as *const u8,
Box::as_mut_ptr(&mut c) as *mut u8,
I * K * 4,
);
};
let mut a: Box<[i32]> = (0..I * J).map(|_| random::<i32>() % 100).collect();
let mut b: Box<[i32]> = (0..J * K).map(|_| random::<i32>() % 100).collect();
let mut correct_c: Box<[i32]> = (0..I * K).map(|_| 0).collect();
for i in 0..I {
for k in 0..K {
......@@ -48,7 +26,10 @@ fn main() {
}
}
}
assert_eq!(c, correct_c);
let a = HerculesBox::from_slice_mut(&mut a);
let b = HerculesBox::from_slice_mut(&mut b);
let c = matmul(I as u64, J as u64, K as u64, a, b).await;
assert_eq!(c.as_slice::<i32>(), &*correct_c);
});
}
......
......@@ -13,5 +13,6 @@ juno_build = { path = "../../juno_build" }
[dependencies]
juno_build = { path = "../../juno_build" }
hercules_rt = { path = "../../hercules_rt" }
with_builtin_macros = "0.1.0"
async-std = "*"
......@@ -13,5 +13,6 @@ juno_build = { path = "../../juno_build" }
[dependencies]
juno_build = { path = "../../juno_build" }
hercules_rt = { path = "../../hercules_rt" }
with_builtin_macros = "0.1.0"
async-std = "*"
......@@ -13,6 +13,7 @@ juno_build = { path = "../../juno_build" }
[dependencies]
juno_build = { path = "../../juno_build" }
hercules_rt = { path = "../../hercules_rt" }
with_builtin_macros = "0.1.0"
async-std = "*"
rand = "*"
#![feature(future_join, box_as_ptr, let_chains)]
#![feature(box_as_ptr, let_chains)]
extern crate async_std;
extern crate hercules_rt;
extern crate juno_build;
extern crate rand;
use core::ptr::copy_nonoverlapping;
use rand::random;
use hercules_rt::HerculesBox;
juno_build::juno!("matmul");
fn main() {
......@@ -17,45 +18,6 @@ fn main() {
const K: usize = 128;
let a: Box<[i32]> = (0..I * J).map(|_| random::<i32>() % 100).collect();
let b: Box<[i32]> = (0..J * K).map(|_| random::<i32>() % 100).collect();
let mut a_bytes: Box<[u8]> = Box::new([0; I * J * 4]);
let mut b_bytes: Box<[u8]> = Box::new([0; J * K * 4]);
unsafe {
copy_nonoverlapping(
Box::as_ptr(&a) as *const u8,
Box::as_mut_ptr(&mut a_bytes) as *mut u8,
I * J * 4,
);
copy_nonoverlapping(
Box::as_ptr(&b) as *const u8,
Box::as_mut_ptr(&mut b_bytes) as *mut u8,
J * K * 4,
);
};
let c_bytes = matmul(
I as u64,
J as u64,
K as u64,
a_bytes.clone(),
b_bytes.clone(),
)
.await;
let mut c: Box<[i32]> = (0..I * K).map(|_| 0).collect();
unsafe {
copy_nonoverlapping(
Box::as_ptr(&c_bytes) as *const u8,
Box::as_mut_ptr(&mut c) as *mut u8,
I * K * 4,
);
};
let tiled_c_bytes = tiled_64_matmul(I as u64, J as u64, K as u64, a_bytes, b_bytes).await;
let mut tiled_c: Box<[i32]> = (0..I * K).map(|_| 0).collect();
unsafe {
copy_nonoverlapping(
Box::as_ptr(&tiled_c_bytes) as *const u8,
Box::as_mut_ptr(&mut tiled_c) as *mut u8,
I * K * 4,
);
};
let mut correct_c: Box<[i32]> = (0..I * K).map(|_| 0).collect();
for i in 0..I {
for k in 0..K {
......@@ -64,8 +26,18 @@ fn main() {
}
}
}
assert_eq!(c, correct_c);
assert_eq!(tiled_c, correct_c);
let c = {
let a = HerculesBox::from_slice(&a);
let b = HerculesBox::from_slice(&b);
matmul(I as u64, J as u64, K as u64, a, b).await
};
let tiled_c = {
let a = HerculesBox::from_slice(&a);
let b = HerculesBox::from_slice(&b);
tiled_64_matmul(I as u64, J as u64, K as u64, a, b).await
};
assert_eq!(c.as_slice::<i32>(), &*correct_c);
assert_eq!(tiled_c.as_slice::<i32>(), &*correct_c);
});
}
......@@ -73,3 +45,4 @@ fn main() {
fn matmul_test() {
main();
}
......@@ -13,5 +13,6 @@ juno_build = { path = "../../juno_build" }
[dependencies]
juno_build = { path = "../../juno_build" }
hercules_rt = { path = "../../hercules_rt" }
with_builtin_macros = "0.1.0"
async-std = "*"
#![feature(box_as_ptr, let_chains)]
extern crate async_std;
extern crate hercules_rt;
extern crate juno_build;
use core::ptr::copy_nonoverlapping;
use hercules_rt::HerculesBox;
juno_build::juno!("nested_ccp");
fn main() {
async_std::task::block_on(async {
let a: Box<[f32]> = Box::new([17.0, 18.0, 19.0]);
let b: Box<[i32]> = Box::new([12, 16, 4, 18, 23, 56, 93, 22, 14]);
let mut a_bytes: Box<[u8]> = Box::new([0; 12]);
let mut b_bytes: Box<[u8]> = Box::new([0; 36]);
unsafe {
copy_nonoverlapping(
Box::as_ptr(&a) as *const u8,
Box::as_mut_ptr(&mut a_bytes) as *mut u8,
12,
);
copy_nonoverlapping(
Box::as_ptr(&b) as *const u8,
Box::as_mut_ptr(&mut b_bytes) as *mut u8,
36,
);
};
let output_example = ccp_example(a_bytes).await;
let output_median = median_array(9, b_bytes).await;
let mut a: Box<[f32]> = Box::new([17.0, 18.0, 19.0]);
let mut b: Box<[i32]> = Box::new([12, 16, 4, 18, 23, 56, 93, 22, 14]);
let a = HerculesBox::from_slice_mut(&mut a);
let b = HerculesBox::from_slice_mut(&mut b);
let output_example = ccp_example(a).await;
let output_median = median_array(9, b).await;
println!("{}", output_example);
println!("{}", output_median);
assert_eq!(output_example, 1.0);
......
......@@ -13,5 +13,6 @@ juno_build = { path = "../../juno_build" }
[dependencies]
juno_build = { path = "../../juno_build" }
hercules_rt = { path = "../../hercules_rt" }
with_builtin_macros = "0.1.0"
async-std = "*"
#![feature(box_as_ptr, let_chains)]
extern crate async_std;
extern crate hercules_rt;
extern crate juno_build;
use core::ptr::copy_nonoverlapping;
use hercules_rt::HerculesBox;
juno_build::juno!("simple3");
fn main() {
async_std::task::block_on(async {
let a: Box<[u32]> = Box::new([1, 2, 3, 4, 5, 6, 7, 8]);
let b: Box<[u32]> = Box::new([8, 7, 6, 5, 4, 3, 2, 1]);
let mut a_bytes: Box<[u8]> = Box::new([0; 32]);
let mut b_bytes: Box<[u8]> = Box::new([0; 32]);
unsafe {
copy_nonoverlapping(
Box::as_ptr(&a) as *const u8,
Box::as_mut_ptr(&mut a_bytes) as *mut u8,
32,
);
copy_nonoverlapping(
Box::as_ptr(&b) as *const u8,
Box::as_mut_ptr(&mut b_bytes) as *mut u8,
32,
);
};
let c = simple3(8, a_bytes, b_bytes).await;
let mut a: Box<[u32]> = Box::new([1, 2, 3, 4, 5, 6, 7, 8]);
let mut b: Box<[u32]> = Box::new([8, 7, 6, 5, 4, 3, 2, 1]);
let a = HerculesBox::from_slice_mut(&mut a);
let b = HerculesBox::from_slice_mut(&mut b);
let c = simple3(8, a, b).await;
println!("{}", c);
assert_eq!(c, 120);
});
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment