Skip to content
Snippets Groups Projects

Add CUDA support to HerculesBox

Merged rarbore2 requested to merge cudart into main
6 files
+ 182
25
Compare changes
  • Side-by-side
  • Inline
Files
6
+ 116
21
@@ -4,6 +4,16 @@ use std::mem::swap;
use std::ptr::{copy_nonoverlapping, NonNull};
use std::slice::from_raw_parts;
#[cfg(feature = "cuda")]
extern "C" {
fn cuda_alloc(size: usize) -> *mut u8;
fn cuda_alloc_zeroed(size: usize) -> *mut u8;
fn cuda_dealloc(ptr: *mut u8);
fn copy_cpu_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
fn copy_cuda_to_cpu(dst: *mut u8, src: *mut u8, size: usize);
fn copy_cuda_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
}
/*
* An in-memory collection object that can be used by functions compiled by the
* Hercules compiler.
@@ -13,16 +23,23 @@ pub struct HerculesBox<'a> {
cpu_exclusive: Option<NonNull<u8>>,
cpu_owned: Option<NonNull<u8>>,
#[cfg(feature = "cuda")]
cuda_owned: Option<NonNull<u8>>,
size: usize,
_phantom: PhantomData<&'a u8>,
}
impl<'a> HerculesBox<'a> {
impl<'b, 'a: 'b> HerculesBox<'a> {
pub fn from_slice<T>(slice: &'a [T]) -> Self {
HerculesBox {
cpu_shared: Some(unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) }),
cpu_exclusive: None,
cpu_owned: None,
#[cfg(feature = "cuda")]
cuda_owned: None,
size: slice.len() * size_of::<T>(),
_phantom: PhantomData,
}
@@ -33,36 +50,69 @@ impl<'a> HerculesBox<'a> {
cpu_shared: None,
cpu_exclusive: Some(unsafe { NonNull::new_unchecked(slice.as_mut_ptr() as *mut u8) }),
cpu_owned: None,
#[cfg(feature = "cuda")]
cuda_owned: None,
size: slice.len() * size_of::<T>(),
_phantom: PhantomData,
}
}
pub fn as_slice<T>(&'a self) -> &'a [T] {
pub fn as_slice<T>(&'b mut self) -> &'b [T] {
assert_eq!(self.size % size_of::<T>(), 0);
unsafe { from_raw_parts(self.__cpu_ptr() as *const T, self.size / size_of::<T>()) }
}
unsafe fn into_cpu(&self) -> NonNull<u8> {
self.cpu_shared
.or(self.cpu_exclusive)
.or(self.cpu_owned)
.unwrap()
unsafe fn get_cpu_ptr(&self) -> Option<NonNull<u8>> {
self.cpu_owned.or(self.cpu_exclusive).or(self.cpu_shared)
}
#[cfg(feature = "cuda")]
unsafe fn get_cuda_ptr(&self) -> Option<NonNull<u8>> {
self.cuda_owned
}
unsafe fn into_cpu_mut(&mut self) -> NonNull<u8> {
if let Some(ptr) = self.cpu_exclusive.or(self.cpu_owned) {
unsafe fn allocate_cpu(&mut self) -> NonNull<u8> {
if let Some(ptr) = self.cpu_owned {
ptr
} else {
let ptr =
NonNull::new(alloc(Layout::from_size_align_unchecked(self.size, 16))).unwrap();
copy_nonoverlapping(self.cpu_shared.unwrap().as_ptr(), ptr.as_ptr(), self.size);
self.cpu_owned = Some(ptr);
self.cpu_shared = None;
ptr
}
}
#[cfg(feature = "cuda")]
unsafe fn allocate_cuda(&mut self) -> NonNull<u8> {
if let Some(ptr) = self.cuda_owned {
ptr
} else {
let ptr = cuda_alloc(self.size);
self.cuda_owned = Some(NonNull::new(ptr).unwrap());
self.cuda_owned.unwrap()
}
}
unsafe fn deallocate_cpu(&mut self) {
if let Some(ptr) = self.cpu_owned {
dealloc(
ptr.as_ptr(),
Layout::from_size_align_unchecked(self.size, 16),
);
self.cpu_owned = None;
}
}
#[cfg(feature = "cuda")]
unsafe fn deallocate_cuda(&mut self) {
if let Some(ptr) = self.cuda_owned {
cuda_dealloc(ptr.as_ptr());
self.cuda_owned = None;
}
}
pub unsafe fn __zeros(size: u64) -> Self {
assert_ne!(size, 0);
let size = size as usize;
@@ -72,6 +122,10 @@ impl<'a> HerculesBox<'a> {
cpu_owned: Some(
NonNull::new(alloc_zeroed(Layout::from_size_align_unchecked(size, 16))).unwrap(),
),
#[cfg(feature = "cuda")]
cuda_owned: None,
size: size,
_phantom: PhantomData,
}
@@ -82,6 +136,10 @@ impl<'a> HerculesBox<'a> {
cpu_shared: None,
cpu_exclusive: None,
cpu_owned: None,
#[cfg(feature = "cuda")]
cuda_owned: None,
size: 0,
_phantom: PhantomData,
}
@@ -93,24 +151,61 @@ impl<'a> HerculesBox<'a> {
ret
}
pub unsafe fn __cpu_ptr(&self) -> *mut u8 {
self.into_cpu().as_ptr()
pub unsafe fn __cpu_ptr(&mut self) -> *mut u8 {
if let Some(ptr) = self.get_cpu_ptr() {
return ptr.as_ptr();
}
#[cfg(feature = "cuda")]
{
let cuda_ptr = self.get_cuda_ptr().unwrap();
let cpu_ptr = self.allocate_cpu();
copy_cuda_to_cpu(cpu_ptr.as_ptr(), cuda_ptr.as_ptr(), self.size);
return cpu_ptr.as_ptr();
}
panic!()
}
pub unsafe fn __cpu_ptr_mut(&mut self) -> *mut u8 {
self.into_cpu_mut().as_ptr()
let cpu_ptr = self.__cpu_ptr();
if Some(cpu_ptr) == self.cpu_shared.map(|nn| nn.as_ptr()) {
self.allocate_cpu();
copy_nonoverlapping(cpu_ptr, self.cpu_owned.unwrap().as_ptr(), self.size);
}
self.cpu_shared = None;
self.cpu_exclusive = None;
#[cfg(feature = "cuda")]
self.deallocate_cuda();
cpu_ptr
}
#[cfg(feature = "cuda")]
pub unsafe fn __cuda_ptr(&mut self) -> *mut u8 {
if let Some(ptr) = self.get_cuda_ptr() {
ptr.as_ptr()
} else {
let cpu_ptr = self.get_cpu_ptr().unwrap();
let cuda_ptr = self.allocate_cuda();
copy_cpu_to_cuda(cuda_ptr.as_ptr(), cpu_ptr.as_ptr(), self.size);
cuda_ptr.as_ptr()
}
}
#[cfg(feature = "cuda")]
pub unsafe fn __cuda_ptr_mut(&mut self) -> *mut u8 {
let cuda_ptr = self.__cuda_ptr();
self.cpu_shared = None;
self.cpu_exclusive = None;
self.deallocate_cpu();
cuda_ptr
}
}
impl<'a> Drop for HerculesBox<'a> {
fn drop(&mut self) {
if let Some(ptr) = self.cpu_owned {
unsafe {
dealloc(
ptr.as_ptr(),
Layout::from_size_align_unchecked(self.size, 16),
)
}
unsafe {
self.deallocate_cpu();
#[cfg(feature = "cuda")]
self.deallocate_cuda();
}
}
}
Loading