Skip to content
Snippets Groups Projects
Commit 7e45a3c3 authored by prrathi's avatar prrathi
Browse files

merged

parents a4ded058 3b491c96
No related branches found
No related tags found
1 merge request!115GPU backend
......@@ -324,7 +324,7 @@ impl<'a> RTContext<'a> {
Node::Constant { id: cons_id } => {
let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap();
write!(block, " {} = ", self.get_value(id))?;
let mut size = None;
let mut size_and_device = None;
match self.module.constants[cons_id.idx()] {
Constant::Boolean(val) => write!(block, "{}bool", val)?,
Constant::Integer8(val) => write!(block, "{}i8", val)?,
......@@ -351,14 +351,15 @@ impl<'a> RTContext<'a> {
write!(block, "backing_{}.byte_add(", device.name())?;
self.codegen_dynamic_constant(offset, block)?;
write!(block, " as usize)")?;
size = Some(self.codegen_type_size(ty));
size_and_device = Some((self.codegen_type_size(ty), device));
}
}
write!(block, ";\n")?;
if let Some(size) = size {
if let Some((size, device)) = size_and_device {
write!(
block,
" ::core::ptr::write_bytes({}, 0, {} as usize);\n",
" ::hercules_rt::__{}_zero_mem({}, {} as usize);\n",
device.name(),
self.get_value(id),
size
)?;
......
use std::alloc::{alloc, dealloc, Layout};
use std::marker::PhantomData;
use std::ptr::{copy_nonoverlapping, NonNull};
use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::slice::{from_raw_parts, from_raw_parts_mut};
/*
......@@ -17,6 +17,10 @@ pub unsafe fn __cpu_dealloc(ptr: *mut u8, size: usize) {
dealloc(ptr, Layout::from_size_align(size, 16).unwrap())
}
pub unsafe fn __cpu_zero_mem(ptr: *mut u8, size: usize) {
write_bytes(ptr, 0, size);
}
pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) {
copy_nonoverlapping(src, dst, size);
}
......@@ -25,6 +29,7 @@ pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) {
extern "C" {
pub fn __cuda_alloc(size: usize) -> *mut u8;
pub fn __cuda_dealloc(ptr: *mut u8, size: usize);
pub fn __cuda_zero_mem(ptr: *mut u8, size: usize);
pub fn __copy_cpu_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
pub fn __copy_cuda_to_cpu(dst: *mut u8, src: *mut u8, size: usize);
pub fn __copy_cuda_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
......
......@@ -13,6 +13,10 @@ extern "C" {
cudaFree(ptr);
}
void __cuda_zero_mem(void *ptr, size_t size) {
cudaMemset(ptr, 0, size);
}
void __copy_cpu_to_cuda(void *dst, void *src, size_t size) {
cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment