#![feature(if_let_guard, let_chains)]
pub mod cpu;
pub mod gpu;
pub mod rt;
pub mod fork_tree;
pub use crate::cpu::*;
pub use crate::gpu::*;
pub use crate::rt::*;
pub use crate::fork_tree::*;
use std::collections::BTreeMap;
use hercules_ir::*;
pub const LARGEST_ALIGNMENT: usize = 64;
/*
* The alignment of a type does not depend on dynamic constants.
*/
pub fn get_type_alignment(types: &Vec<Type>, ty: TypeID) -> usize {
match types[ty.idx()] {
Type::Control | Type::MultiReturn(_) => panic!(),
Type::Boolean | Type::Integer8 | Type::UnsignedInteger8 | Type::Float8 => 1,
Type::Integer16 | Type::UnsignedInteger16 | Type::BFloat16 => 2,
Type::Integer32 | Type::UnsignedInteger32 | Type::Float32 => 4,
Type::Integer64 | Type::UnsignedInteger64 | Type::Float64 => 8,
Type::Product(ref members) | Type::Summation(ref members) => members
.into_iter()
.map(|id| get_type_alignment(types, *id))
.max()
.unwrap_or(1),
// Use a large alignment for arrays to generate better vector code.
Type::Array(_, _) => LARGEST_ALIGNMENT,
}
}
/*
* Nodes producing collection values are "colored" with what device their
* underlying memory lives on. Also explicitly store the device of the
* parameters and return of each function.
*/
pub type FunctionNodeColors = (
BTreeMap<NodeID, Device>,
Vec<Option<Device>>,
Vec<Option<Device>>,
);
pub type NodeColors = BTreeMap<FunctionID, FunctionNodeColors>;
/*
* The allocation information of each function is a size of the backing memory
* needed and offsets into that backing memory per constant object and call node
* in the function (as well as their individual sizes).
*/
pub type FunctionBackingAllocation = BTreeMap<
Device,
(
DynamicConstantID,
BTreeMap<NodeID, (DynamicConstantID, DynamicConstantID)>,
),
>;
pub type BackingAllocations = BTreeMap<FunctionID, FunctionBackingAllocation>;
pub const BACKED_DEVICES: [Device; 2] = [Device::LLVM, Device::CUDA];
pub fn backing_device(device: Device) -> Device {
match device {
Device::LLVM => Device::LLVM,
Device::CUDA => Device::CUDA,
// Memory loads and stores in AsyncRust code execute on the CPU.
Device::AsyncRust => Device::LLVM,
}
}