Skip to content
Snippets Groups Projects

Set up cava benchmark

Merged rarbore2 requested to merge cava_opt_3 into main
Files
7
+ 35
13
#![feature(once_cell_try)]
#![feature(once_cell_try, pointer_is_aligned_to)]
use std::alloc::{alloc, dealloc, Layout};
use std::marker::PhantomData;
use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::slice::{from_raw_parts, from_raw_parts_mut};
use std::sync::OnceLock;
use aligned_vec::AVec;
/*
* Define supporting types, functions, and macros for Hercules RT functions. For
* a more in-depth discussion of the design of these utilities, see hercules_cg/
@@ -189,6 +190,7 @@ pub struct CUDABox {
impl<'a> HerculesCPURef<'a> {
pub fn from_slice<T>(slice: &'a [T]) -> Self {
assert!(slice.as_ptr().is_aligned_to(32));
let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) };
let size = slice.len() * size_of::<T>();
Self {
@@ -201,7 +203,6 @@ impl<'a> HerculesCPURef<'a> {
pub fn as_slice<T>(self) -> &'a [T] {
let ptr = self.ptr.as_ptr() as *const T;
assert_eq!(self.size % size_of::<T>(), 0);
assert!(ptr.is_aligned());
unsafe { from_raw_parts(ptr, self.size / size_of::<T>()) }
}
@@ -214,6 +215,7 @@ impl<'a> HerculesCPURef<'a> {
}
pub unsafe fn __from_parts(ptr: *mut u8, size: usize) -> Self {
assert!(ptr.is_aligned_to(32));
Self {
ptr: NonNull::new(ptr).unwrap(),
size,
@@ -224,6 +226,7 @@ impl<'a> HerculesCPURef<'a> {
impl<'a> HerculesCPURefMut<'a> {
pub fn from_slice<T>(slice: &'a mut [T]) -> Self {
assert!(slice.as_ptr().is_aligned_to(32));
let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) };
let size = slice.len() * size_of::<T>();
Self {
@@ -257,6 +260,7 @@ impl<'a> HerculesCPURefMut<'a> {
}
pub unsafe fn __from_parts(ptr: *mut u8, size: usize) -> Self {
assert!(ptr.is_aligned_to(32));
Self {
ptr: NonNull::new(ptr).unwrap(),
size,
@@ -268,6 +272,7 @@ impl<'a> HerculesCPURefMut<'a> {
#[cfg(feature = "cuda")]
impl<'a> HerculesCUDARef<'a> {
pub fn to_cpu_ref<'b, T>(self, dst: &'b mut [T]) -> HerculesCPURefMut<'b> {
assert!(dst.as_ptr().is_aligned_to(32));
unsafe {
let size = self.size;
assert_eq!(size, dst.len() * size_of::<T>());
@@ -309,6 +314,7 @@ impl<'a> HerculesCUDARefMut<'a> {
}
pub fn to_cpu_ref<'b, T>(self, dst: &mut [T]) -> HerculesCPURefMut<'b> {
assert!(dst.as_ptr().is_aligned_to(32));
unsafe {
let size = self.size;
let ptr = NonNull::new(dst.as_ptr() as *mut u8).unwrap();
@@ -458,7 +464,7 @@ unsafe impl Sync for __RawPtrSendSync {}
*/
pub struct HerculesImmBox<'a, T> {
#[allow(dead_code)]
cpu_alloc: OnceLock<Vec<T>>,
cpu_alloc: OnceLock<AVec<T>>,
#[cfg(feature = "cuda")]
cuda_alloc: OnceLock<CUDABox>,
@@ -467,16 +473,32 @@ pub struct HerculesImmBox<'a, T> {
cuda_ref: OnceLock<HerculesCUDARef<'a>>,
}
impl<'a, T> From<&'a [T]> for HerculesImmBox<'a, T> {
impl<'a, T: Clone> From<&'a [T]> for HerculesImmBox<'a, T> {
fn from(value: &'a [T]) -> Self {
HerculesImmBox {
cpu_alloc: OnceLock::new(),
#[cfg(feature = "cuda")]
cuda_alloc: OnceLock::new(),
cpu_ref: OnceLock::from(HerculesCPURef::from_slice(value)),
#[cfg(feature = "cuda")]
cuda_ref: OnceLock::new(),
if value.as_ptr().is_aligned_to(32) {
HerculesImmBox {
cpu_alloc: OnceLock::new(),
#[cfg(feature = "cuda")]
cuda_alloc: OnceLock::new(),
cpu_ref: OnceLock::from(HerculesCPURef::from_slice(value)),
#[cfg(feature = "cuda")]
cuda_ref: OnceLock::new(),
}
} else {
let cpu_alloc = AVec::from_slice(32, value);
let size = value.len() * size_of::<T>();
let cpu_ref =
unsafe { HerculesCPURef::__from_parts(cpu_alloc.as_ptr() as *mut u8, size) };
HerculesImmBox {
cpu_alloc: OnceLock::from(cpu_alloc),
#[cfg(feature = "cuda")]
cuda_alloc: OnceLock::new(),
cpu_ref: OnceLock::from(cpu_ref),
#[cfg(feature = "cuda")]
cuda_ref: OnceLock::new(),
}
}
}
}
Loading