Skip to content
Snippets Groups Projects
Commit cb7cbc82 authored by Xavier Routh's avatar Xavier Routh
Browse files

Merge branch 'main' into fork-opt

parents 1402f583 34f9cfcf
No related branches found
No related tags found
1 merge request!103Forkify + Loop Canonicalization + Initial Fork Fission
......@@ -324,7 +324,7 @@ impl<'a> RTContext<'a> {
Node::Constant { id: cons_id } => {
let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap();
write!(block, " {} = ", self.get_value(id))?;
let mut size = None;
let mut size_and_device = None;
match self.module.constants[cons_id.idx()] {
Constant::Boolean(val) => write!(block, "{}bool", val)?,
Constant::Integer8(val) => write!(block, "{}i8", val)?,
......@@ -351,14 +351,15 @@ impl<'a> RTContext<'a> {
write!(block, "backing_{}.byte_add(", device.name())?;
self.codegen_dynamic_constant(offset, block)?;
write!(block, " as usize)")?;
size = Some(self.codegen_type_size(ty));
size_and_device = Some((self.codegen_type_size(ty), device));
}
}
write!(block, ";\n")?;
if let Some(size) = size {
if let Some((size, device)) = size_and_device {
write!(
block,
" ::core::ptr::write_bytes({}, 0, {} as usize);\n",
" ::hercules_rt::__{}_zero_mem({}, {} as usize);\n",
device.name(),
self.get_value(id),
size
)?;
......
......@@ -248,6 +248,14 @@ pub fn reduce_cycles(
for reduce in reduces {
let (join, _, reduct) = function.nodes[reduce.idx()].try_reduce().unwrap();
let fork = join_fork_map[&join];
let isnt_outside_fork_join = |id: NodeID| {
let node = &function.nodes[id.idx()];
node.try_phi()
.map(|(control, _)| control)
.or(node.try_reduce().map(|(control, _, _)| control))
.map(|control| fork_join_nest[&fork].contains(&control))
.unwrap_or(true)
};
// First, find all data nodes that are used by the `reduct` input of the
// reduce, including the `reduct` itself.
......@@ -258,10 +266,7 @@ pub fn reduce_cycles(
for u in get_uses(&function.nodes[item.idx()]).as_ref() {
if !function.nodes[u.idx()].is_control()
&& !use_reachable.contains(u)
&& function.nodes[u.idx()]
.try_phi()
.map(|(control, _)| fork_join_nest[&fork].contains(&control))
.unwrap_or(true)
&& isnt_outside_fork_join(*u)
{
use_reachable.insert(*u);
worklist.push(*u);
......@@ -274,7 +279,10 @@ pub fn reduce_cycles(
let mut worklist = vec![reduce];
while let Some(item) = worklist.pop() {
for u in def_use.get_users(item) {
if !function.nodes[u.idx()].is_control() && !user_reachable.contains(u) {
if !function.nodes[u.idx()].is_control()
&& !user_reachable.contains(u)
&& isnt_outside_fork_join(*u)
{
user_reachable.insert(*u);
worklist.push(*u);
}
......
use std::alloc::{alloc, dealloc, Layout};
use std::marker::PhantomData;
use std::ptr::{copy_nonoverlapping, NonNull};
use std::ptr::{copy_nonoverlapping, write_bytes, NonNull};
use std::slice::{from_raw_parts, from_raw_parts_mut};
/*
......@@ -17,6 +17,10 @@ pub unsafe fn __cpu_dealloc(ptr: *mut u8, size: usize) {
dealloc(ptr, Layout::from_size_align(size, 16).unwrap())
}
pub unsafe fn __cpu_zero_mem(ptr: *mut u8, size: usize) {
write_bytes(ptr, 0, size);
}
pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) {
copy_nonoverlapping(src, dst, size);
}
......@@ -25,6 +29,7 @@ pub unsafe fn __copy_cpu_to_cpu(dst: *mut u8, src: *mut u8, size: usize) {
extern "C" {
pub fn __cuda_alloc(size: usize) -> *mut u8;
pub fn __cuda_dealloc(ptr: *mut u8, size: usize);
pub fn __cuda_zero_mem(ptr: *mut u8, size: usize);
pub fn __copy_cpu_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
pub fn __copy_cuda_to_cpu(dst: *mut u8, src: *mut u8, size: usize);
pub fn __copy_cuda_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
......
......@@ -12,6 +12,10 @@ extern "C" {
(void) size;
cudaFree(ptr);
}
void __cuda_zero_mem(void *ptr, size_t size) {
cudaMemset(ptr, 0, size);
}
void __copy_cpu_to_cuda(void *dst, void *src, size_t size) {
cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment