Skip to content
Snippets Groups Projects

Safe rust interface

Merged Aaron Councilman requested to merge safe-rust-interface into main
Files
3
+ 353
0
 
#![feature(once_cell_try)]
 
use std::alloc::{alloc, dealloc, Layout};
use std::alloc::{alloc, dealloc, Layout};
use std::marker::PhantomData;
use std::marker::PhantomData;
use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::slice::{from_raw_parts, from_raw_parts_mut};
use std::slice::{from_raw_parts, from_raw_parts_mut};
 
use std::sync::OnceLock;
 
/*
/*
* Define supporting types, functions, and macros for Hercules RT functions. For
* Define supporting types, functions, and macros for Hercules RT functions. For
* a more in-depth discussion of the design of these utilities, see hercules_cg/
* a more in-depth discussion of the design of these utilities, see hercules_cg/
@@ -330,6 +334,10 @@ impl CUDABox {
@@ -330,6 +334,10 @@ impl CUDABox {
_phantom: PhantomData,
_phantom: PhantomData,
}
}
}
}
 
 
pub fn get_bytes(&self) -> usize {
 
self.size
 
}
}
}
#[cfg(feature = "cuda")]
#[cfg(feature = "cuda")]
@@ -354,3 +362,348 @@ macro_rules! runner {
@@ -354,3 +362,348 @@ macro_rules! runner {
<concat_idents!(HerculesRunner_, $x)>::new()
<concat_idents!(HerculesRunner_, $x)>::new()
};
};
}
}
 
 
/*
 
* A HerculesBox holds memory that can be on any device and provides a common interface to moving
 
* data where it is needed.
 
*
 
* It can hold CPU and device allocations to basically point at the memory it represents. It can
 
* also hold Hercules references either to those allocations it holds or to other allocations not
 
* held by this Box (in which case the appropriate allocation field should be None).
 
*
 
* The data held at all of its non-None allocations and references is maintained so that it is the
 
* same, and so methods will attempt to use the reference or allocation that is most convenient.
 
*
 
* HerculesImmBox hold references to immutable memory only. All operations on these is through
 
* immutable references, though internally it uses OnceLocks to protect its resources since the Box
 
* may be used in multiple parallel threads if it is used in parallel Hercules code invocation.
 
* We use OnceLocks since the data is immutable and so once it has been placed on a device movement
 
* is not necessary.
 
*
 
* We maintain the invariant that at least one of the device references is always set, their
 
* associated allocations may or may not be set, as those may not be needed if the allocation is
 
* help elsewhere.
 
*
 
* HerculesMutBox holds memory on some device and can produce mutable references to that data on
 
* on any device. All these operations are through mutable references since this ensures exclusive
 
* access to the Box and therefore to the underlying device memory. Because of the exclusive access
 
* locks are not needed.
 
*
 
* Note that a HerculesMutBox may hold multiple allocations at once, but it tracks the "definitive"
 
* copy to be the one borrowed mutably most recently (since it may have been updated). The extra
 
* allocations are kept around to avoid reallocation if memory is moved back to the device.
 
*/
 
pub struct HerculesImmBox<'a, T> {
 
#[allow(dead_code)]
 
cpu_alloc: OnceLock<Vec<T>>,
 
#[cfg(feature = "cuda")]
 
cuda_alloc: OnceLock<CUDABox>,
 
 
cpu_ref: OnceLock<HerculesCPURef<'a>>,
 
#[cfg(feature = "cuda")]
 
cuda_ref: OnceLock<HerculesCUDARef<'a>>,
 
}
 
 
impl<'a, T> From<&'a [T]> for HerculesImmBox<'a, T> {
 
fn from(value: &'a [T]) -> Self {
 
HerculesImmBox {
 
cpu_alloc: OnceLock::new(),
 
#[cfg(feature = "cuda")]
 
cuda_alloc: OnceLock::new(),
 
 
cpu_ref: OnceLock::from(HerculesCPURef::from_slice(value)),
 
#[cfg(feature = "cuda")]
 
cuda_ref: OnceLock::new(),
 
}
 
}
 
}
 
 
impl<'a, T> From<HerculesCPURef<'a>> for HerculesImmBox<'a, T> {
 
fn from(value: HerculesCPURef<'a>) -> Self {
 
HerculesImmBox {
 
cpu_alloc: OnceLock::new(),
 
#[cfg(feature = "cuda")]
 
cuda_alloc: OnceLock::new(),
 
 
cpu_ref: OnceLock::from(value),
 
#[cfg(feature = "cuda")]
 
cuda_ref: OnceLock::new(),
 
}
 
}
 
}
 
 
// If we are building from a mutable reference, we demote that to a non-mutable reference since we
 
// don't hold mutable references.
 
impl<'a, T> From<HerculesCPURefMut<'a>> for HerculesImmBox<'a, T> {
 
fn from(value: HerculesCPURefMut<'a>) -> Self {
 
HerculesImmBox {
 
cpu_alloc: OnceLock::new(),
 
#[cfg(feature = "cuda")]
 
cuda_alloc: OnceLock::new(),
 
 
cpu_ref: OnceLock::from(value.as_ref()),
 
#[cfg(feature = "cuda")]
 
cuda_ref: OnceLock::new(),
 
}
 
}
 
}
 
 
#[cfg(feature = "cuda")]
 
impl<'a, T> From<HerculesCUDARef<'a>> for HerculesImmBox<'a, T> {
 
fn from(value: HerculesCUDARef<'a>) -> Self {
 
HerculesImmBox {
 
cpu_alloc: OnceLock::new(),
 
#[cfg(feature = "cuda")]
 
cuda_alloc: OnceLock::new(),
 
 
cpu_ref: OnceLock::new(),
 
#[cfg(feature = "cuda")]
 
cuda_ref: OnceLock::from(value),
 
}
 
}
 
}
 
 
#[cfg(feature = "cuda")]
 
impl<'a, T> From<HerculesCUDARefMut<'a>> for HerculesImmBox<'a, T> {
 
fn from(value: HerculesCUDARefMut<'a>) -> Self {
 
HerculesImmBox {
 
cpu_alloc: OnceLock::new(),
 
#[cfg(feature = "cuda")]
 
cuda_alloc: OnceLock::new(),
 
 
cpu_ref: OnceLock::new(),
 
#[cfg(feature = "cuda")]
 
cuda_ref: OnceLock::from(value.as_ref()),
 
}
 
}
 
}
 
 
impl<'a, T> HerculesImmBox<'a, T>
 
where
 
T: Default + Clone
 
{
 
pub fn as_slice(&'a self) -> &'a [T] {
 
self.as_cpu_ref().as_slice()
 
}
 
 
pub fn to_vec(&'a self) -> Vec<T> {
 
Vec::from(self.as_cpu_ref().as_slice())
 
}
 
 
pub fn as_cpu_ref(&'a self) -> HerculesCPURef<'a> {
 
if let Some(cpu_ref) = self.cpu_ref.get() {
 
cpu_ref.clone()
 
} else {
 
#[cfg(feature = "cuda")]
 
if let Some(cuda_ref) = self.cuda_ref.get() {
 
return
 
self.cpu_ref.get_or_init(|| {
 
let elements = unsafe { cuda_ref.__size() / size_of::<T>() };
 
 
let mut alloc = Vec::new();
 
alloc.resize_with(elements, Default::default);
 
let _ = cuda_ref.clone().to_cpu_ref(&mut alloc);
 
 
self.cpu_alloc.set(alloc).map_err(|_| ()).expect("HerculesImmBox cpu_alloc was set unexpectedly");
 
let alloc = self.cpu_alloc.get().unwrap();
 
HerculesCPURef::from_slice(alloc)
 
}).clone();
 
}
 
 
panic!("HerculesImmBox has no reference to data")
 
}
 
}
 
 
#[cfg(feature = "cuda")]
 
pub fn as_cuda_ref(&'a self) -> HerculesCUDARef<'a> {
 
if let Some(cuda_ref) = self.cuda_ref.get() {
 
cuda_ref.clone()
 
} else {
 
if let Some(cpu_ref) = self.cpu_ref.get() {
 
return self.cuda_ref.get_or_init(|| {
 
// Copy data to CUDA device
 
let alloc = CUDABox::from_cpu_ref(cpu_ref.clone());
 
self.cuda_alloc.set(alloc).map_err(|_| ()).expect("HerculesImmBox cuda_alloc was set unexpectedly");
 
 
self.cuda_alloc.get().unwrap().get_ref()
 
}).clone();
 
}
 
 
panic!("HerculesImmBox has no reference to data")
 
}
 
}
 
}
 
 
enum HerculesMutBoxLocation {
 
CPU,
 
#[cfg(feature = "cuda")]
 
CUDA,
 
}
 
 
pub struct HerculesMutBox<T> {
 
loc: HerculesMutBoxLocation,
 
 
cpu_alloc: Option<Vec<T>>,
 
#[cfg(feature = "cuda")]
 
cuda_alloc: Option<CUDABox>,
 
}
 
 
impl<T: Clone> From<&mut [T]> for HerculesMutBox<T> {
 
fn from(value: &mut [T]) -> Self {
 
HerculesMutBox {
 
loc: HerculesMutBoxLocation::CPU,
 
cpu_alloc: Some(value.to_vec()),
 
#[cfg(feature = "cuda")]
 
cuda_alloc: None,
 
}
 
}
 
}
 
 
impl<'a, T: Clone> From<HerculesCPURef<'a>> for HerculesMutBox<T> {
 
fn from(value: HerculesCPURef<'a>) -> Self {
 
HerculesMutBox {
 
loc: HerculesMutBoxLocation::CPU,
 
cpu_alloc: Some(value.as_slice().to_vec()),
 
#[cfg(feature = "cuda")]
 
cuda_alloc: None,
 
}
 
}
 
}
 
 
impl<'a, T: Clone> From<HerculesCPURefMut<'a>> for HerculesMutBox<T> {
 
fn from(value: HerculesCPURefMut<'a>) -> Self {
 
HerculesMutBox {
 
loc: HerculesMutBoxLocation::CPU,
 
cpu_alloc: Some(value.as_slice().to_vec()),
 
#[cfg(feature = "cuda")]
 
cuda_alloc: None,
 
}
 
}
 
}
 
 
#[cfg(feature = "cuda")]
 
impl<'a, T> From<HerculesCUDARef<'a>> for HerculesMutBox<T> {
 
fn from(value: HerculesCUDARef<'a>) -> Self {
 
HerculesMutBox {
 
loc: HerculesMutBoxLocation::CUDA,
 
cpu_alloc: None,
 
#[cfg(feature = "cuda")]
 
cuda_alloc: Some(CUDABox::from_cuda_ref(value)),
 
}
 
}
 
}
 
 
#[cfg(feature = "cuda")]
 
impl<'a, T> From<HerculesCUDARefMut<'a>> for HerculesMutBox<T> {
 
fn from(value: HerculesCUDARefMut<'a>) -> Self {
 
HerculesMutBox {
 
loc: HerculesMutBoxLocation::CUDA,
 
cpu_alloc: None,
 
#[cfg(feature = "cuda")]
 
cuda_alloc: Some(CUDABox::from_cuda_ref(value.as_ref())),
 
}
 
}
 
}
 
 
impl<T> HerculesMutBox<T>
 
where
 
T: Default
 
{
 
pub fn as_slice(&mut self) -> &mut [T] {
 
self.as_cpu_ref().as_slice()
 
}
 
 
pub fn to_vec(mut self) -> Vec<T> {
 
// Bring to CPU (if needed)
 
let _ = self.as_cpu_ref();
 
self.cpu_alloc.unwrap()
 
}
 
 
pub fn as_cpu_ref<'a>(&'a mut self) -> HerculesCPURefMut<'a> {
 
match self.loc {
 
HerculesMutBoxLocation::CPU => {
 
HerculesCPURefMut::from_slice(self.cpu_alloc.as_mut().unwrap())
 
}
 
#[cfg(feature = "cuda")]
 
HerculesMutBoxLocation::CUDA => {
 
let cuda_alloc = self.cuda_alloc.as_ref().unwrap();
 
let elements = cuda_alloc.get_bytes() / size_of::<T>();
 
 
// Allocate host memory (if needed)
 
if self.cpu_alloc.is_none() || self.cpu_alloc.as_ref().unwrap().len() != elements {
 
let mut alloc = Vec::new();
 
alloc.resize_with(elements, Default::default);
 
self.cpu_alloc = Some(alloc);
 
}
 
 
// Transfer data from CUDA device
 
let cpu_alloc = self.cpu_alloc.as_mut().unwrap();
 
let _ = cuda_alloc.get_ref().to_cpu_ref(cpu_alloc);
 
 
self.loc = HerculesMutBoxLocation::CPU;
 
HerculesCPURefMut::from_slice(self.cpu_alloc.as_mut().unwrap())
 
}
 
}
 
}
 
 
#[cfg(feature = "cuda")]
 
pub fn as_cuda_ref<'a>(&'a mut self) -> HerculesCUDARefMut<'a> {
 
match self.loc {
 
HerculesMutBoxLocation::CPU => {
 
// TODO: CUDABox does not provide an interface for copying data to it, so currently
 
// we just reallocate it
 
let cpu_ref = HerculesCPURef::from_slice(self.cpu_alloc.as_ref().unwrap());
 
let cuda_alloc = CUDABox::from_cpu_ref(cpu_ref);
 
 
self.cuda_alloc = Some(cuda_alloc);
 
self.loc = HerculesMutBoxLocation::CUDA;
 
self.cuda_alloc.as_mut().unwrap().get_ref_mut()
 
}
 
HerculesMutBoxLocation::CUDA => {
 
self.cuda_alloc.as_mut().unwrap().get_ref_mut()
 
}
 
}
 
}
 
}
 
 
pub trait HerculesImmBoxTo<'a, T> {
 
fn to(&'a self) -> T;
 
}
 
 
impl<'a, T> HerculesImmBoxTo<'a, HerculesCPURef<'a>> for HerculesImmBox<'a, T>
 
where T: Default + Clone
 
{
 
fn to(&'a self) -> HerculesCPURef<'a> {
 
self.as_cpu_ref()
 
}
 
}
 
 
#[cfg(feature = "cuda")]
 
impl<'a, T> HerculesImmBoxTo<'a, HerculesCUDARef<'a>> for HerculesImmBox<'a, T>
 
where T: Default + Clone
 
{
 
fn to(&'a self) -> HerculesCUDARef<'a> {
 
self.as_cuda_ref()
 
}
 
}
 
 
pub trait HerculesMutBoxTo<'a, T> {
 
fn to(&'a mut self) -> T;
 
}
 
 
impl<'a, T> HerculesMutBoxTo<'a, HerculesCPURefMut<'a>> for HerculesMutBox<T>
 
where T: Default + Clone
 
{
 
fn to(&'a mut self) -> HerculesCPURefMut<'a> {
 
self.as_cpu_ref()
 
}
 
}
 
 
#[cfg(feature = "cuda")]
 
impl<'a, T> HerculesMutBoxTo<'a, HerculesCUDARefMut<'a>> for HerculesMutBox<T>
 
where T: Default + Clone
 
{
 
fn to(&'a mut self) -> HerculesCUDARefMut<'a> {
 
self.as_cuda_ref()
 
}
 
}
Loading