#![feature(if_let_guard, let_chains)] pub mod cpu; pub mod gpu; pub mod rt; pub mod fork_tree; pub use crate::cpu::*; pub use crate::gpu::*; pub use crate::rt::*; pub use crate::fork_tree::*; use std::collections::BTreeMap; use hercules_ir::*; pub const LARGEST_ALIGNMENT: usize = 64; /* * The alignment of a type does not depend on dynamic constants. */ pub fn get_type_alignment(types: &Vec<Type>, ty: TypeID) -> usize { match types[ty.idx()] { Type::Control | Type::MultiReturn(_) => panic!(), Type::Boolean | Type::Integer8 | Type::UnsignedInteger8 | Type::Float8 => 1, Type::Integer16 | Type::UnsignedInteger16 | Type::BFloat16 => 2, Type::Integer32 | Type::UnsignedInteger32 | Type::Float32 => 4, Type::Integer64 | Type::UnsignedInteger64 | Type::Float64 => 8, Type::Product(ref members) | Type::Summation(ref members) => members .into_iter() .map(|id| get_type_alignment(types, *id)) .max() .unwrap_or(1), // Use a large alignment for arrays to generate better vector code. Type::Array(_, _) => LARGEST_ALIGNMENT, } } /* * Nodes producing collection values are "colored" with what device their * underlying memory lives on. Also explicitly store the device of the * parameters and return of each function. */ pub type FunctionNodeColors = ( BTreeMap<NodeID, Device>, Vec<Option<Device>>, Vec<Option<Device>>, ); pub type NodeColors = BTreeMap<FunctionID, FunctionNodeColors>; /* * The allocation information of each function is a size of the backing memory * needed and offsets into that backing memory per constant object and call node * in the function (as well as their individual sizes). */ pub type FunctionBackingAllocation = BTreeMap< Device, ( DynamicConstantID, BTreeMap<NodeID, (DynamicConstantID, DynamicConstantID)>, ), >; pub type BackingAllocations = BTreeMap<FunctionID, FunctionBackingAllocation>; pub const BACKED_DEVICES: [Device; 2] = [Device::LLVM, Device::CUDA]; pub fn backing_device(device: Device) -> Device { match device { Device::LLVM => Device::LLVM, Device::CUDA => Device::CUDA, // Memory loads and stores in AsyncRust code execute on the CPU. Device::AsyncRust => Device::LLVM, } }