Skip to content
Snippets Groups Projects
Commit ec2f22f9 authored by Russel Arbore's avatar Russel Arbore
Browse files

Use device-specific function in zeroing out memories

parent 53c5c99e
No related branches found
No related tags found
1 merge request!131Use device-specific function in zeroing out memories
Pipeline #201212 passed
...@@ -324,7 +324,7 @@ impl<'a> RTContext<'a> { ...@@ -324,7 +324,7 @@ impl<'a> RTContext<'a> {
Node::Constant { id: cons_id } => { Node::Constant { id: cons_id } => {
let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap(); let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap();
write!(block, " {} = ", self.get_value(id))?; write!(block, " {} = ", self.get_value(id))?;
let mut size = None; let mut size_and_device = None;
match self.module.constants[cons_id.idx()] { match self.module.constants[cons_id.idx()] {
Constant::Boolean(val) => write!(block, "{}bool", val)?, Constant::Boolean(val) => write!(block, "{}bool", val)?,
Constant::Integer8(val) => write!(block, "{}i8", val)?, Constant::Integer8(val) => write!(block, "{}i8", val)?,
...@@ -351,14 +351,15 @@ impl<'a> RTContext<'a> { ...@@ -351,14 +351,15 @@ impl<'a> RTContext<'a> {
write!(block, "backing_{}.byte_add(", device.name())?; write!(block, "backing_{}.byte_add(", device.name())?;
self.codegen_dynamic_constant(offset, block)?; self.codegen_dynamic_constant(offset, block)?;
write!(block, " as usize)")?; write!(block, " as usize)")?;
size = Some(self.codegen_type_size(ty)); size_and_device = Some((self.codegen_type_size(ty), device));
} }
} }
write!(block, ";\n")?; write!(block, ";\n")?;
if let Some(size) = size { if let Some((size, device)) = size_and_device {
write!( write!(
block, block,
" ::core::ptr::write_bytes({}, 0, {} as usize);\n", " ::hercules_rt::__{}_zero_mem({}, {} as usize);\n",
device.name(),
self.get_value(id), self.get_value(id),
size size
)?; )?;
......
use std::alloc::{alloc, dealloc, Layout}; use std::alloc::{alloc, dealloc, Layout};
use std::marker::PhantomData; use std::marker::PhantomData;
use std::ptr::{copy_nonoverlapping, NonNull}; use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::slice::{from_raw_parts, from_raw_parts_mut}; use std::slice::{from_raw_parts, from_raw_parts_mut};
/* /*
...@@ -17,6 +17,10 @@ pub unsafe fn __cpu_dealloc(ptr: *mut u8, size: usize) { ...@@ -17,6 +17,10 @@ pub unsafe fn __cpu_dealloc(ptr: *mut u8, size: usize) {
dealloc(ptr, Layout::from_size_align(size, 16).unwrap()) dealloc(ptr, Layout::from_size_align(size, 16).unwrap())
} }
pub unsafe fn __cpu_zero_mem(ptr: *mut u8, size: usize) {
write_bytes(ptr, 0, size);
}
pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) { pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) {
copy_nonoverlapping(src, dst, size); copy_nonoverlapping(src, dst, size);
} }
...@@ -25,6 +29,7 @@ pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) { ...@@ -25,6 +29,7 @@ pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) {
extern "C" { extern "C" {
pub fn __cuda_alloc(size: usize) -> *mut u8; pub fn __cuda_alloc(size: usize) -> *mut u8;
pub fn __cuda_dealloc(ptr: *mut u8, size: usize); pub fn __cuda_dealloc(ptr: *mut u8, size: usize);
pub fn __cuda_zero_mem(ptr: *mut u8, size: usize);
pub fn __copy_cpu_to_cuda(dst: *mut u8, src: *mut u8, size: usize); pub fn __copy_cpu_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
pub fn __copy_cuda_to_cpu(dst: *mut u8, src: *mut u8, size: usize); pub fn __copy_cuda_to_cpu(dst: *mut u8, src: *mut u8, size: usize);
pub fn __copy_cuda_to_cuda(dst: *mut u8, src: *mut u8, size: usize); pub fn __copy_cuda_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
......
...@@ -12,6 +12,10 @@ extern "C" { ...@@ -12,6 +12,10 @@ extern "C" {
(void) size; (void) size;
cudaFree(ptr); cudaFree(ptr);
} }
void __cuda_zero_mem(void *ptr, size_t size) {
cudaMemset(ptr, 0, size);
}
void __copy_cpu_to_cuda(void *dst, void *src, size_t size) { void __copy_cpu_to_cuda(void *dst, void *src, size_t size) {
cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice); cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment