Skip to content
Snippets Groups Projects
Commit 004eb2c6 authored by Russel Arbore's avatar Russel Arbore
Browse files

Auto align in hercules box

parent f7a35a6d
No related branches found
No related tags found
1 merge request!190Set up cava benchmark
Pipeline #201764 failed
This commit is part of merge request !190. Comments created here will be created in the context of that merge request.
......@@ -1065,6 +1065,9 @@ dependencies = [
[[package]]
name = "hercules_rt"
version = "0.1.0"
dependencies = [
"aligned-vec",
]
[[package]]
name = "hercules_tests"
......
......@@ -9,4 +9,4 @@ cuda = []
debug = []
[dependencies]
aligned-vec = "*"
\ No newline at end of file
......@@ -4,9 +4,10 @@ use std::alloc::{alloc, dealloc, Layout};
use std::marker::PhantomData;
use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::slice::{from_raw_parts, from_raw_parts_mut};
use std::sync::OnceLock;
use aligned_vec::AVec;
/*
* Define supporting types, functions, and macros for Hercules RT functions. For
* a more in-depth discussion of the design of these utilities, see hercules_cg/
......@@ -463,7 +464,7 @@ unsafe impl Sync for __RawPtrSendSync {}
*/
pub struct HerculesImmBox<'a, T> {
#[allow(dead_code)]
cpu_alloc: OnceLock<Vec<T>>,
cpu_alloc: OnceLock<AVec<T>>,
#[cfg(feature = "cuda")]
cuda_alloc: OnceLock<CUDABox>,
......@@ -472,16 +473,32 @@ pub struct HerculesImmBox<'a, T> {
cuda_ref: OnceLock<HerculesCUDARef<'a>>,
}
impl<'a, T> From<&'a [T]> for HerculesImmBox<'a, T> {
impl<'a, T: Clone> From<&'a [T]> for HerculesImmBox<'a, T> {
fn from(value: &'a [T]) -> Self {
HerculesImmBox {
cpu_alloc: OnceLock::new(),
#[cfg(feature = "cuda")]
cuda_alloc: OnceLock::new(),
cpu_ref: OnceLock::from(HerculesCPURef::from_slice(value)),
#[cfg(feature = "cuda")]
cuda_ref: OnceLock::new(),
if value.as_ptr().is_aligned_to(32) {
HerculesImmBox {
cpu_alloc: OnceLock::new(),
#[cfg(feature = "cuda")]
cuda_alloc: OnceLock::new(),
cpu_ref: OnceLock::from(HerculesCPURef::from_slice(value)),
#[cfg(feature = "cuda")]
cuda_ref: OnceLock::new(),
}
} else {
let cpu_alloc = AVec::from_slice(32, value);
let size = value.len() * size_of::<T>();
let cpu_ref =
unsafe { HerculesCPURef::__from_parts(cpu_alloc.as_ptr() as *mut u8, size) };
HerculesImmBox {
cpu_alloc: OnceLock::from(cpu_alloc),
#[cfg(feature = "cuda")]
cuda_alloc: OnceLock::new(),
cpu_ref: OnceLock::from(cpu_ref),
#[cfg(feature = "cuda")]
cuda_ref: OnceLock::new(),
}
}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment