diff --git a/Cargo.lock b/Cargo.lock index 1973fbbeb88622cb01fe3ddfc191aae796a00f54..41ca98b7ba16abf993f42f8b7edfe4d2b7e234cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1065,6 +1065,9 @@ dependencies = [ [[package]] name = "hercules_rt" version = "0.1.0" +dependencies = [ + "aligned-vec", +] [[package]] name = "hercules_tests" diff --git a/hercules_rt/Cargo.toml b/hercules_rt/Cargo.toml index 46886b12520ea94fe2d661d39698ba4bc6e401fd..61a6188a73b589d8a2ebf63062044b005442ee56 100644 --- a/hercules_rt/Cargo.toml +++ b/hercules_rt/Cargo.toml @@ -9,4 +9,4 @@ cuda = [] debug = [] [dependencies] - +aligned-vec = "*" \ No newline at end of file diff --git a/hercules_rt/src/lib.rs b/hercules_rt/src/lib.rs index 3b79dc4879153b69706b795dc21ad91d47bb0488..5e8c031a215729d6fe3bc9954c93ec7d964bb6dc 100644 --- a/hercules_rt/src/lib.rs +++ b/hercules_rt/src/lib.rs @@ -4,9 +4,10 @@ use std::alloc::{alloc, dealloc, Layout}; use std::marker::PhantomData; use std::ptr::{copy_nonoverlapping, write_bytes, NonNull}; use std::slice::{from_raw_parts, from_raw_parts_mut}; - use std::sync::OnceLock; +use aligned_vec::AVec; + /* * Define supporting types, functions, and macros for Hercules RT functions. For * a more in-depth discussion of the design of these utilities, see hercules_cg/ @@ -463,7 +464,7 @@ unsafe impl Sync for __RawPtrSendSync {} */ pub struct HerculesImmBox<'a, T> { #[allow(dead_code)] - cpu_alloc: OnceLock<Vec<T>>, + cpu_alloc: OnceLock<AVec<T>>, #[cfg(feature = "cuda")] cuda_alloc: OnceLock<CUDABox>, @@ -472,16 +473,32 @@ pub struct HerculesImmBox<'a, T> { cuda_ref: OnceLock<HerculesCUDARef<'a>>, } -impl<'a, T> From<&'a [T]> for HerculesImmBox<'a, T> { +impl<'a, T: Clone> From<&'a [T]> for HerculesImmBox<'a, T> { fn from(value: &'a [T]) -> Self { - HerculesImmBox { - cpu_alloc: OnceLock::new(), - #[cfg(feature = "cuda")] - cuda_alloc: OnceLock::new(), - - cpu_ref: OnceLock::from(HerculesCPURef::from_slice(value)), - #[cfg(feature = "cuda")] - cuda_ref: OnceLock::new(), + if value.as_ptr().is_aligned_to(32) { + HerculesImmBox { + cpu_alloc: OnceLock::new(), + #[cfg(feature = "cuda")] + cuda_alloc: OnceLock::new(), + + cpu_ref: OnceLock::from(HerculesCPURef::from_slice(value)), + #[cfg(feature = "cuda")] + cuda_ref: OnceLock::new(), + } + } else { + let cpu_alloc = AVec::from_slice(32, value); + let size = value.len() * size_of::<T>(); + let cpu_ref = + unsafe { HerculesCPURef::__from_parts(cpu_alloc.as_ptr() as *mut u8, size) }; + HerculesImmBox { + cpu_alloc: OnceLock::from(cpu_alloc), + #[cfg(feature = "cuda")] + cuda_alloc: OnceLock::new(), + + cpu_ref: OnceLock::from(cpu_ref), + #[cfg(feature = "cuda")] + cuda_ref: OnceLock::new(), + } } } }