diff --git a/hercules_cg/src/rt.rs b/hercules_cg/src/rt.rs
index 445647efc5ada88879fc61718d4761c684c25db1..3b35f73ed370984753fa8e416f3b26c24951283f 100644
--- a/hercules_cg/src/rt.rs
+++ b/hercules_cg/src/rt.rs
@@ -324,7 +324,7 @@ impl<'a> RTContext<'a> {
             Node::Constant { id: cons_id } => {
                 let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap();
                 write!(block, "                {} = ", self.get_value(id))?;
-                let mut size = None;
+                let mut size_and_device = None;
                 match self.module.constants[cons_id.idx()] {
                     Constant::Boolean(val) => write!(block, "{}bool", val)?,
                     Constant::Integer8(val) => write!(block, "{}i8", val)?,
@@ -351,14 +351,15 @@ impl<'a> RTContext<'a> {
                         write!(block, "backing_{}.byte_add(", device.name())?;
                         self.codegen_dynamic_constant(offset, block)?;
                         write!(block, " as usize)")?;
-                        size = Some(self.codegen_type_size(ty));
+                        size_and_device = Some((self.codegen_type_size(ty), device));
                     }
                 }
                 write!(block, ";\n")?;
-                if let Some(size) = size {
+                if let Some((size, device)) = size_and_device {
                     write!(
                         block,
-                        "                ::core::ptr::write_bytes({}, 0, {} as usize);\n",
+                        "                ::hercules_rt::__{}_zero_mem({}, {} as usize);\n",
+                        device.name(),
                         self.get_value(id),
                         size
                     )?;
diff --git a/hercules_rt/src/lib.rs b/hercules_rt/src/lib.rs
index db2dee7740072c0ec5098d76a57765dfd287ae70..c0da1096cf65cb643c4ee21b2713e148cbb73ef1 100644
--- a/hercules_rt/src/lib.rs
+++ b/hercules_rt/src/lib.rs
@@ -1,6 +1,6 @@
 use std::alloc::{alloc, dealloc, Layout};
 use std::marker::PhantomData;
-use std::ptr::{copy_nonoverlapping, NonNull};
+use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
 use std::slice::{from_raw_parts, from_raw_parts_mut};
 
 /*
@@ -17,6 +17,10 @@ pub unsafe fn __cpu_dealloc(ptr: *mut u8, size: usize) {
     dealloc(ptr, Layout::from_size_align(size, 16).unwrap())
 }
 
+pub unsafe fn __cpu_zero_mem(ptr: *mut u8, size: usize) {
+    write_bytes(ptr, 0, size);
+}
+
 pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) {
     copy_nonoverlapping(src, dst, size);
 }
@@ -25,6 +29,7 @@ pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) {
 extern "C" {
     pub fn __cuda_alloc(size: usize) -> *mut u8;
     pub fn __cuda_dealloc(ptr: *mut u8, size: usize);
+    pub fn __cuda_zero_mem(ptr: *mut u8, size: usize);
     pub fn __copy_cpu_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
     pub fn __copy_cuda_to_cpu(dst: *mut u8, src: *mut u8, size: usize);
     pub fn __copy_cuda_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
diff --git a/hercules_rt/src/rtdefs.cu b/hercules_rt/src/rtdefs.cu
index ab67ec98654a57f2ea0cfe2c932fad0c77d445fa..24bbf2d572e00f99216da9be878d68e86406ba1d 100644
--- a/hercules_rt/src/rtdefs.cu
+++ b/hercules_rt/src/rtdefs.cu
@@ -12,6 +12,10 @@ extern "C" {
 		(void) size;
 		cudaFree(ptr);
 	}
+
+	void __cuda_zero_mem(void *ptr, size_t size) {
+		cudaMemset(ptr, 0, size);
+	}
 	
 	void __copy_cpu_to_cuda(void *dst, void *src, size_t size) {
 		cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);