Skip to content
Snippets Groups Projects
Commit 69d7a09c authored by Russel Arbore's avatar Russel Arbore
Browse files

assert hercules cpu refs are given aligned pointers

parent 7c9fe86e
No related branches found
No related tags found
1 merge request!190Set up cava benchmark
Pipeline #201762 failed
This commit is part of merge request !190. Comments created here will be created in the context of that merge request.
#![feature(once_cell_try)] #![feature(once_cell_try, pointer_is_aligned_to)]
use std::alloc::{alloc, dealloc, Layout}; use std::alloc::{alloc, dealloc, Layout};
use std::marker::PhantomData; use std::marker::PhantomData;
...@@ -189,6 +189,7 @@ pub struct CUDABox { ...@@ -189,6 +189,7 @@ pub struct CUDABox {
impl<'a> HerculesCPURef<'a> { impl<'a> HerculesCPURef<'a> {
pub fn from_slice<T>(slice: &'a [T]) -> Self { pub fn from_slice<T>(slice: &'a [T]) -> Self {
assert!(slice.as_ptr().is_aligned_to(32));
let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) }; let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) };
let size = slice.len() * size_of::<T>(); let size = slice.len() * size_of::<T>();
Self { Self {
...@@ -201,7 +202,6 @@ impl<'a> HerculesCPURef<'a> { ...@@ -201,7 +202,6 @@ impl<'a> HerculesCPURef<'a> {
pub fn as_slice<T>(self) -> &'a [T] { pub fn as_slice<T>(self) -> &'a [T] {
let ptr = self.ptr.as_ptr() as *const T; let ptr = self.ptr.as_ptr() as *const T;
assert_eq!(self.size % size_of::<T>(), 0); assert_eq!(self.size % size_of::<T>(), 0);
assert!(ptr.is_aligned());
unsafe { from_raw_parts(ptr, self.size / size_of::<T>()) } unsafe { from_raw_parts(ptr, self.size / size_of::<T>()) }
} }
...@@ -214,6 +214,7 @@ impl<'a> HerculesCPURef<'a> { ...@@ -214,6 +214,7 @@ impl<'a> HerculesCPURef<'a> {
} }
pub unsafe fn __from_parts(ptr: *mut u8, size: usize) -> Self { pub unsafe fn __from_parts(ptr: *mut u8, size: usize) -> Self {
assert!(ptr.is_aligned_to(32));
Self { Self {
ptr: NonNull::new(ptr).unwrap(), ptr: NonNull::new(ptr).unwrap(),
size, size,
...@@ -224,6 +225,7 @@ impl<'a> HerculesCPURef<'a> { ...@@ -224,6 +225,7 @@ impl<'a> HerculesCPURef<'a> {
impl<'a> HerculesCPURefMut<'a> { impl<'a> HerculesCPURefMut<'a> {
pub fn from_slice<T>(slice: &'a mut [T]) -> Self { pub fn from_slice<T>(slice: &'a mut [T]) -> Self {
assert!(slice.as_ptr().is_aligned_to(32));
let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) }; let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) };
let size = slice.len() * size_of::<T>(); let size = slice.len() * size_of::<T>();
Self { Self {
...@@ -257,6 +259,7 @@ impl<'a> HerculesCPURefMut<'a> { ...@@ -257,6 +259,7 @@ impl<'a> HerculesCPURefMut<'a> {
} }
pub unsafe fn __from_parts(ptr: *mut u8, size: usize) -> Self { pub unsafe fn __from_parts(ptr: *mut u8, size: usize) -> Self {
assert!(ptr.is_aligned_to(32));
Self { Self {
ptr: NonNull::new(ptr).unwrap(), ptr: NonNull::new(ptr).unwrap(),
size, size,
...@@ -268,6 +271,7 @@ impl<'a> HerculesCPURefMut<'a> { ...@@ -268,6 +271,7 @@ impl<'a> HerculesCPURefMut<'a> {
#[cfg(feature = "cuda")] #[cfg(feature = "cuda")]
impl<'a> HerculesCUDARef<'a> { impl<'a> HerculesCUDARef<'a> {
pub fn to_cpu_ref<'b, T>(self, dst: &'b mut [T]) -> HerculesCPURefMut<'b> { pub fn to_cpu_ref<'b, T>(self, dst: &'b mut [T]) -> HerculesCPURefMut<'b> {
assert!(dst.as_ptr().is_aligned_to(32));
unsafe { unsafe {
let size = self.size; let size = self.size;
assert_eq!(size, dst.len() * size_of::<T>()); assert_eq!(size, dst.len() * size_of::<T>());
...@@ -309,6 +313,7 @@ impl<'a> HerculesCUDARefMut<'a> { ...@@ -309,6 +313,7 @@ impl<'a> HerculesCUDARefMut<'a> {
} }
pub fn to_cpu_ref<'b, T>(self, dst: &mut [T]) -> HerculesCPURefMut<'b> { pub fn to_cpu_ref<'b, T>(self, dst: &mut [T]) -> HerculesCPURefMut<'b> {
assert!(dst.as_ptr().is_aligned_to(32));
unsafe { unsafe {
let size = self.size; let size = self.size;
let ptr = NonNull::new(dst.as_ptr() as *mut u8).unwrap(); let ptr = NonNull::new(dst.as_ptr() as *mut u8).unwrap();
......
#![feature(concat_idents)] #![feature(concat_idents)]
use std::iter::zip;
use rand::random; use rand::random;
#[cfg(feature = "cuda")] use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox};
use hercules_rt::CUDABox;
use hercules_rt::{runner, HerculesCPURef};
juno_build::juno!("matmul"); juno_build::juno!("matmul");
...@@ -13,9 +12,9 @@ fn main() { ...@@ -13,9 +12,9 @@ fn main() {
const I: usize = 256; const I: usize = 256;
const J: usize = 8; // hardcoded constant in matmul.hir const J: usize = 8; // hardcoded constant in matmul.hir
const K: usize = 128; const K: usize = 128;
let mut a: Box<[i32]> = (0..I * J).map(|_| random::<i32>() % 100).collect(); let a: Box<[f32]> = (0..I * J).map(|_| random::<f32>()).collect();
let mut b: Box<[i32]> = (0..J * K).map(|_| random::<i32>() % 100).collect(); let b: Box<[f32]> = (0..J * K).map(|_| random::<f32>()).collect();
let mut correct_c: Box<[i32]> = (0..I * K).map(|_| 0).collect(); let mut correct_c: Box<[f32]> = (0..I * K).map(|_| 0.0).collect();
for i in 0..I { for i in 0..I {
for k in 0..K { for k in 0..K {
for j in 0..J { for j in 0..J {
...@@ -23,25 +22,12 @@ fn main() { ...@@ -23,25 +22,12 @@ fn main() {
} }
} }
} }
#[cfg(not(feature = "cuda"))] let a = HerculesImmBox::from(&a as &[f32]);
{ let b = HerculesImmBox::from(&b as &[f32]);
let a = HerculesCPURef::from_slice(&mut a); let mut r = runner!(matmul);
let b = HerculesCPURef::from_slice(&mut b); let mut c = HerculesMutBox::from(r.run(I as u64, J as u64, K as u64, a.to(), b.to()).await);
let mut r = runner!(matmul); for (calc, correct) in zip(c.as_slice().into_iter().map(|x: &mut f32| *x), correct_c) {
let c = r.run(I as u64, J as u64, K as u64, a, b).await; assert!((calc - correct).abs() < 0.0001, "{} != {}", calc, correct);
assert_eq!(c.as_slice::<i32>(), &*correct_c);
}
#[cfg(feature = "cuda")]
{
let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a));
let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b));
let mut r = runner!(matmul);
let c = r
.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref())
.await;
let mut c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice();
c.to_cpu_ref(&mut c_cpu);
assert_eq!(&*c_cpu, &*correct_c);
} }
}); });
} }
......
...@@ -3,9 +3,7 @@ use std::iter::zip; ...@@ -3,9 +3,7 @@ use std::iter::zip;
use rand::random; use rand::random;
use hercules_rt::{runner, HerculesRefInto}; use hercules_rt::{runner, HerculesImmBox, HerculesImmBoxTo, HerculesMutBox};
#[cfg(feature = "cuda")]
use hercules_rt::{CUDABox, HerculesCPURef};
juno_build::juno!("matmul"); juno_build::juno!("matmul");
...@@ -24,26 +22,12 @@ fn main() { ...@@ -24,26 +22,12 @@ fn main() {
} }
} }
} }
#[cfg(not(feature = "cuda"))] let a = HerculesImmBox::from(&a as &[f32]);
{ let b = HerculesImmBox::from(&b as &[f32]);
let mut r = runner!(matmul); let mut r = runner!(matmul);
let c = r.run(I as u64, J as u64, K as u64, a.to(), b.to()).await; let mut c = HerculesMutBox::from(r.run(I as u64, J as u64, K as u64, a.to(), b.to()).await);
let c = c.as_slice::<f32>(); for (calc, correct) in zip(c.as_slice().into_iter().map(|x: &mut f32| *x), correct_c) {
assert_eq!(c, &*correct_c); assert!((calc - correct).abs() < 0.0001, "{} != {}", calc, correct);
}
#[cfg(feature = "cuda")]
{
let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&a));
let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&b));
let mut r = runner!(matmul);
let c = r
.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref())
.await;
let mut c_cpu: Box<[f32]> = vec![0.0; correct_c.len()].into_boxed_slice();
c.to_cpu_ref(&mut c_cpu);
for (calc, correct) in zip(c_cpu, correct_c) {
assert!((calc - correct).abs() < 0.00001, "{} != {}", calc, correct);
}
} }
}); });
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment