diff --git a/hercules_cg/src/rt.rs b/hercules_cg/src/rt.rs index 445647efc5ada88879fc61718d4761c684c25db1..3b35f73ed370984753fa8e416f3b26c24951283f 100644 --- a/hercules_cg/src/rt.rs +++ b/hercules_cg/src/rt.rs @@ -324,7 +324,7 @@ impl<'a> RTContext<'a> { Node::Constant { id: cons_id } => { let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap(); write!(block, " {} = ", self.get_value(id))?; - let mut size = None; + let mut size_and_device = None; match self.module.constants[cons_id.idx()] { Constant::Boolean(val) => write!(block, "{}bool", val)?, Constant::Integer8(val) => write!(block, "{}i8", val)?, @@ -351,14 +351,15 @@ impl<'a> RTContext<'a> { write!(block, "backing_{}.byte_add(", device.name())?; self.codegen_dynamic_constant(offset, block)?; write!(block, " as usize)")?; - size = Some(self.codegen_type_size(ty)); + size_and_device = Some((self.codegen_type_size(ty), device)); } } write!(block, ";\n")?; - if let Some(size) = size { + if let Some((size, device)) = size_and_device { write!( block, - " ::core::ptr::write_bytes({}, 0, {} as usize);\n", + " ::hercules_rt::__{}_zero_mem({}, {} as usize);\n", + device.name(), self.get_value(id), size )?; diff --git a/hercules_rt/src/lib.rs b/hercules_rt/src/lib.rs index db2dee7740072c0ec5098d76a57765dfd287ae70..c0da1096cf65cb643c4ee21b2713e148cbb73ef1 100644 --- a/hercules_rt/src/lib.rs +++ b/hercules_rt/src/lib.rs @@ -1,6 +1,6 @@ use std::alloc::{alloc, dealloc, Layout}; use std::marker::PhantomData; -use std::ptr::{copy_nonoverlapping, NonNull}; +use std::ptr::{copy_nonoverlapping, write_bytes, NonNull}; use std::slice::{from_raw_parts, from_raw_parts_mut}; /* @@ -17,6 +17,10 @@ pub unsafe fn __cpu_dealloc(ptr: *mut u8, size: usize) { dealloc(ptr, Layout::from_size_align(size, 16).unwrap()) } +pub unsafe fn __cpu_zero_mem(ptr: *mut u8, size: usize) { + write_bytes(ptr, 0, size); +} + pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) { copy_nonoverlapping(src, dst, size); } @@ -25,6 +29,7 @@ pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) { extern "C" { pub fn __cuda_alloc(size: usize) -> *mut u8; pub fn __cuda_dealloc(ptr: *mut u8, size: usize); + pub fn __cuda_zero_mem(ptr: *mut u8, size: usize); pub fn __copy_cpu_to_cuda(dst: *mut u8, src: *mut u8, size: usize); pub fn __copy_cuda_to_cpu(dst: *mut u8, src: *mut u8, size: usize); pub fn __copy_cuda_to_cuda(dst: *mut u8, src: *mut u8, size: usize); diff --git a/hercules_rt/src/rtdefs.cu b/hercules_rt/src/rtdefs.cu index ab67ec98654a57f2ea0cfe2c932fad0c77d445fa..24bbf2d572e00f99216da9be878d68e86406ba1d 100644 --- a/hercules_rt/src/rtdefs.cu +++ b/hercules_rt/src/rtdefs.cu @@ -12,6 +12,10 @@ extern "C" { (void) size; cudaFree(ptr); } + + void __cuda_zero_mem(void *ptr, size_t size) { + cudaMemset(ptr, 0, size); + } void __copy_cpu_to_cuda(void *dst, void *src, size_t size) { cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);