Skip to content
Snippets Groups Projects

Set up cava benchmark

Merged rarbore2 requested to merge cava_opt_3 into main
Files
6
+ 28
7
#![feature(once_cell_try, pointer_is_aligned_to)]
#![feature(once_cell_try, pointer_is_aligned_to)]
use std::alloc::{alloc, dealloc, Layout};
use std::alloc::{alloc, dealloc, GlobalAlloc, Layout, System};
use std::marker::PhantomData;
use std::marker::PhantomData;
use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::slice::{from_raw_parts, from_raw_parts_mut};
use std::slice::{from_raw_parts, from_raw_parts_mut};
@@ -189,7 +189,7 @@ pub struct CUDABox {
@@ -189,7 +189,7 @@ pub struct CUDABox {
impl<'a> HerculesCPURef<'a> {
impl<'a> HerculesCPURef<'a> {
pub fn from_slice<T>(slice: &'a [T]) -> Self {
pub fn from_slice<T>(slice: &'a [T]) -> Self {
assert!(slice.as_ptr().is_aligned_to(32));
assert!(slice.as_ptr().is_aligned_to(LARGEST_ALIGNMENT));
let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) };
let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) };
let size = slice.len() * size_of::<T>();
let size = slice.len() * size_of::<T>();
Self {
Self {
@@ -214,7 +214,7 @@ impl<'a> HerculesCPURef<'a> {
@@ -214,7 +214,7 @@ impl<'a> HerculesCPURef<'a> {
}
}
pub unsafe fn __from_parts(ptr: *mut u8, size: usize) -> Self {
pub unsafe fn __from_parts(ptr: *mut u8, size: usize) -> Self {
assert!(ptr.is_aligned_to(32));
assert!(ptr.is_aligned_to(LARGEST_ALIGNMENT));
Self {
Self {
ptr: NonNull::new(ptr).unwrap(),
ptr: NonNull::new(ptr).unwrap(),
size,
size,
@@ -225,7 +225,7 @@ impl<'a> HerculesCPURef<'a> {
@@ -225,7 +225,7 @@ impl<'a> HerculesCPURef<'a> {
impl<'a> HerculesCPURefMut<'a> {
impl<'a> HerculesCPURefMut<'a> {
pub fn from_slice<T>(slice: &'a mut [T]) -> Self {
pub fn from_slice<T>(slice: &'a mut [T]) -> Self {
assert!(slice.as_ptr().is_aligned_to(32));
assert!(slice.as_ptr().is_aligned_to(LARGEST_ALIGNMENT));
let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) };
let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) };
let size = slice.len() * size_of::<T>();
let size = slice.len() * size_of::<T>();
Self {
Self {
@@ -259,7 +259,7 @@ impl<'a> HerculesCPURefMut<'a> {
@@ -259,7 +259,7 @@ impl<'a> HerculesCPURefMut<'a> {
}
}
pub unsafe fn __from_parts(ptr: *mut u8, size: usize) -> Self {
pub unsafe fn __from_parts(ptr: *mut u8, size: usize) -> Self {
assert!(ptr.is_aligned_to(32));
assert!(ptr.is_aligned_to(LARGEST_ALIGNMENT));
Self {
Self {
ptr: NonNull::new(ptr).unwrap(),
ptr: NonNull::new(ptr).unwrap(),
size,
size,
@@ -271,7 +271,7 @@ impl<'a> HerculesCPURefMut<'a> {
@@ -271,7 +271,7 @@ impl<'a> HerculesCPURefMut<'a> {
#[cfg(feature = "cuda")]
#[cfg(feature = "cuda")]
impl<'a> HerculesCUDARef<'a> {
impl<'a> HerculesCUDARef<'a> {
pub fn to_cpu_ref<'b, T>(self, dst: &'b mut [T]) -> HerculesCPURefMut<'b> {
pub fn to_cpu_ref<'b, T>(self, dst: &'b mut [T]) -> HerculesCPURefMut<'b> {
assert!(dst.as_ptr().is_aligned_to(32));
assert!(dst.as_ptr().is_aligned_to(LARGEST_ALIGNMENT));
unsafe {
unsafe {
let size = self.size;
let size = self.size;
assert_eq!(size, dst.len() * size_of::<T>());
assert_eq!(size, dst.len() * size_of::<T>());
@@ -313,7 +313,7 @@ impl<'a> HerculesCUDARefMut<'a> {
@@ -313,7 +313,7 @@ impl<'a> HerculesCUDARefMut<'a> {
}
}
pub fn to_cpu_ref<'b, T>(self, dst: &mut [T]) -> HerculesCPURefMut<'b> {
pub fn to_cpu_ref<'b, T>(self, dst: &mut [T]) -> HerculesCPURefMut<'b> {
assert!(dst.as_ptr().is_aligned_to(32));
assert!(dst.as_ptr().is_aligned_to(LARGEST_ALIGNMENT));
unsafe {
unsafe {
let size = self.size;
let size = self.size;
let ptr = NonNull::new(dst.as_ptr() as *mut u8).unwrap();
let ptr = NonNull::new(dst.as_ptr() as *mut u8).unwrap();
@@ -872,3 +872,24 @@ impl<'a, T> HerculesRefInto<'a> for Box<[T]> {
@@ -872,3 +872,24 @@ impl<'a, T> HerculesRefInto<'a> for Box<[T]> {
HerculesCPURef::from_slice(self)
HerculesCPURef::from_slice(self)
}
}
}
}
 
 
/*
 
* We need all allocations to be aligned to LARGEST_ALIGNMENT bytes for
 
* vectorization. This is the easiest way to do that.
 
*/
 
pub struct AlignedAlloc;
 
 
unsafe impl GlobalAlloc for AlignedAlloc {
 
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
 
let layout = layout.align_to(LARGEST_ALIGNMENT).unwrap();
 
System.alloc(layout)
 
}
 
 
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
 
let layout = layout.align_to(LARGEST_ALIGNMENT).unwrap();
 
System.dealloc(ptr, layout)
 
}
 
}
 
 
#[global_allocator]
 
static A: AlignedAlloc = AlignedAlloc;
Loading