Skip to content
Snippets Groups Projects
Commit cb7cbc82 authored by Xavier Routh's avatar Xavier Routh
Browse files

Merge branch 'main' into fork-opt

parents 1402f583 34f9cfcf
No related branches found
No related tags found
1 merge request!103Forkify + Loop Canonicalization + Initial Fork Fission
...@@ -324,7 +324,7 @@ impl<'a> RTContext<'a> { ...@@ -324,7 +324,7 @@ impl<'a> RTContext<'a> {
Node::Constant { id: cons_id } => { Node::Constant { id: cons_id } => {
let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap(); let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap();
write!(block, " {} = ", self.get_value(id))?; write!(block, " {} = ", self.get_value(id))?;
let mut size = None; let mut size_and_device = None;
match self.module.constants[cons_id.idx()] { match self.module.constants[cons_id.idx()] {
Constant::Boolean(val) => write!(block, "{}bool", val)?, Constant::Boolean(val) => write!(block, "{}bool", val)?,
Constant::Integer8(val) => write!(block, "{}i8", val)?, Constant::Integer8(val) => write!(block, "{}i8", val)?,
...@@ -351,14 +351,15 @@ impl<'a> RTContext<'a> { ...@@ -351,14 +351,15 @@ impl<'a> RTContext<'a> {
write!(block, "backing_{}.byte_add(", device.name())?; write!(block, "backing_{}.byte_add(", device.name())?;
self.codegen_dynamic_constant(offset, block)?; self.codegen_dynamic_constant(offset, block)?;
write!(block, " as usize)")?; write!(block, " as usize)")?;
size = Some(self.codegen_type_size(ty)); size_and_device = Some((self.codegen_type_size(ty), device));
} }
} }
write!(block, ";\n")?; write!(block, ";\n")?;
if let Some(size) = size { if let Some((size, device)) = size_and_device {
write!( write!(
block, block,
" ::core::ptr::write_bytes({}, 0, {} as usize);\n", " ::hercules_rt::__{}_zero_mem({}, {} as usize);\n",
device.name(),
self.get_value(id), self.get_value(id),
size size
)?; )?;
......
...@@ -248,6 +248,14 @@ pub fn reduce_cycles( ...@@ -248,6 +248,14 @@ pub fn reduce_cycles(
for reduce in reduces { for reduce in reduces {
let (join, _, reduct) = function.nodes[reduce.idx()].try_reduce().unwrap(); let (join, _, reduct) = function.nodes[reduce.idx()].try_reduce().unwrap();
let fork = join_fork_map[&join]; let fork = join_fork_map[&join];
let isnt_outside_fork_join = |id: NodeID| {
let node = &function.nodes[id.idx()];
node.try_phi()
.map(|(control, _)| control)
.or(node.try_reduce().map(|(control, _, _)| control))
.map(|control| fork_join_nest[&fork].contains(&control))
.unwrap_or(true)
};
// First, find all data nodes that are used by the `reduct` input of the // First, find all data nodes that are used by the `reduct` input of the
// reduce, including the `reduct` itself. // reduce, including the `reduct` itself.
...@@ -258,10 +266,7 @@ pub fn reduce_cycles( ...@@ -258,10 +266,7 @@ pub fn reduce_cycles(
for u in get_uses(&function.nodes[item.idx()]).as_ref() { for u in get_uses(&function.nodes[item.idx()]).as_ref() {
if !function.nodes[u.idx()].is_control() if !function.nodes[u.idx()].is_control()
&& !use_reachable.contains(u) && !use_reachable.contains(u)
&& function.nodes[u.idx()] && isnt_outside_fork_join(*u)
.try_phi()
.map(|(control, _)| fork_join_nest[&fork].contains(&control))
.unwrap_or(true)
{ {
use_reachable.insert(*u); use_reachable.insert(*u);
worklist.push(*u); worklist.push(*u);
...@@ -274,7 +279,10 @@ pub fn reduce_cycles( ...@@ -274,7 +279,10 @@ pub fn reduce_cycles(
let mut worklist = vec![reduce]; let mut worklist = vec![reduce];
while let Some(item) = worklist.pop() { while let Some(item) = worklist.pop() {
for u in def_use.get_users(item) { for u in def_use.get_users(item) {
if !function.nodes[u.idx()].is_control() && !user_reachable.contains(u) { if !function.nodes[u.idx()].is_control()
&& !user_reachable.contains(u)
&& isnt_outside_fork_join(*u)
{
user_reachable.insert(*u); user_reachable.insert(*u);
worklist.push(*u); worklist.push(*u);
} }
......
use std::alloc::{alloc, dealloc, Layout}; use std::alloc::{alloc, dealloc, Layout};
use std::marker::PhantomData; use std::marker::PhantomData;
use std::ptr::{copy_nonoverlapping, NonNull}; use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::slice::{from_raw_parts, from_raw_parts_mut}; use std::slice::{from_raw_parts, from_raw_parts_mut};
/* /*
...@@ -17,6 +17,10 @@ pub unsafe fn __cpu_dealloc(ptr: *mut u8, size: usize) { ...@@ -17,6 +17,10 @@ pub unsafe fn __cpu_dealloc(ptr: *mut u8, size: usize) {
dealloc(ptr, Layout::from_size_align(size, 16).unwrap()) dealloc(ptr, Layout::from_size_align(size, 16).unwrap())
} }
pub unsafe fn __cpu_zero_mem(ptr: *mut u8, size: usize) {
write_bytes(ptr, 0, size);
}
pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) { pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) {
copy_nonoverlapping(src, dst, size); copy_nonoverlapping(src, dst, size);
} }
...@@ -25,6 +29,7 @@ pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) { ...@@ -25,6 +29,7 @@ pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) {
extern "C" { extern "C" {
pub fn __cuda_alloc(size: usize) -> *mut u8; pub fn __cuda_alloc(size: usize) -> *mut u8;
pub fn __cuda_dealloc(ptr: *mut u8, size: usize); pub fn __cuda_dealloc(ptr: *mut u8, size: usize);
pub fn __cuda_zero_mem(ptr: *mut u8, size: usize);
pub fn __copy_cpu_to_cuda(dst: *mut u8, src: *mut u8, size: usize); pub fn __copy_cpu_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
pub fn __copy_cuda_to_cpu(dst: *mut u8, src: *mut u8, size: usize); pub fn __copy_cuda_to_cpu(dst: *mut u8, src: *mut u8, size: usize);
pub fn __copy_cuda_to_cuda(dst: *mut u8, src: *mut u8, size: usize); pub fn __copy_cuda_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
......
...@@ -12,6 +12,10 @@ extern "C" { ...@@ -12,6 +12,10 @@ extern "C" {
(void) size; (void) size;
cudaFree(ptr); cudaFree(ptr);
} }
void __cuda_zero_mem(void *ptr, size_t size) {
cudaMemset(ptr, 0, size);
}
void __copy_cpu_to_cuda(void *dst, void *src, size_t size) { void __copy_cpu_to_cuda(void *dst, void *src, size_t size) {
cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice); cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment