diff --git a/hercules_cg/src/cpu.rs b/hercules_cg/src/cpu.rs index b15cf30106c76640016b1957fe9906ac01c74858..37bf814d85569a5aa8dd827e2fce17894da5a397 100644 --- a/hercules_cg/src/cpu.rs +++ b/hercules_cg/src/cpu.rs @@ -16,6 +16,7 @@ static NUM_FILLER_REGS: AtomicUsize = AtomicUsize::new(0); * LLVM bindings for Rust, and we are *not* writing any C++. */ pub fn cpu_codegen<W: Write>( + module_name: &str, function: &Function, types: &Vec<Type>, constants: &Vec<Constant>, @@ -27,6 +28,7 @@ pub fn cpu_codegen<W: Write>( w: &mut W, ) -> Result<(), Error> { let ctx = CPUContext { + module_name, function, types, constants, @@ -40,6 +42,7 @@ pub fn cpu_codegen<W: Write>( } struct CPUContext<'a> { + module_name: &'a str, function: &'a Function, types: &'a Vec<Type>, constants: &'a Vec<Constant>, @@ -65,16 +68,18 @@ impl<'a> CPUContext<'a> { if self.types[return_type.idx()].is_primitive() { write!( w, - "define dso_local {} @{}(", + "define dso_local {} @{}_{}(", self.get_type(return_type), - self.function.name + self.module_name, + self.function.name, )?; } else { write!( w, - "define dso_local nonnull noundef {} @{}(", + "define dso_local nonnull noundef {} @{}_{}(", self.get_type(return_type), - self.function.name + self.module_name, + self.function.name, )?; } } else { @@ -89,7 +94,11 @@ impl<'a> CPUContext<'a> { .collect::<Vec<_>>() .join(", "), )?; - write!(w, "define dso_local void @{}(", self.function.name,)?; + write!( + w, + "define dso_local void @{}_{}(", + self.module_name, self.function.name, + )?; } let mut first_param = true; // The first parameter is a pointer to CPU backing memory, if it's diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs index a3eea2745ea6dde2929b9cd6fcc17f5c6483643f..1ba8302ab84c96d818e8fdbfecea10576ed946a5 100644 --- a/hercules_cg/src/gpu.rs +++ b/hercules_cg/src/gpu.rs @@ -14,6 +14,7 @@ use crate::*; * of similarities with the CPU LLVM generation plus custom GPU parallelization. */ pub fn gpu_codegen<W: Write>( + module_name: &str, function: &Function, types: &Vec<Type>, constants: &Vec<Constant>, @@ -170,6 +171,7 @@ pub fn gpu_codegen<W: Write>( }; let ctx = GPUContext { + module_name, function, types, constants, @@ -199,6 +201,7 @@ struct GPUKernelParams { } struct GPUContext<'a> { + module_name: &'a str, function: &'a Function, types: &'a Vec<Type>, constants: &'a Vec<Constant>, @@ -395,8 +398,8 @@ namespace cg = cooperative_groups; fn codegen_kernel_begin<W: Write>(&self, w: &mut W) -> Result<(), Error> { write!( w, - "__global__ void __launch_bounds__({}) {}_gpu(", - self.kernel_params.max_num_threads, self.function.name + "__global__ void __launch_bounds__({}) {}_{}_gpu(", + self.kernel_params.max_num_threads, self.module_name, self.function.name )?; let mut first_param = true; // The first parameter is a pointer to GPU backing memory, if it's @@ -645,7 +648,7 @@ namespace cg = cooperative_groups; } else { write!(w, "{}", self.get_type(self.function.return_types[0], false))?; } - write!(w, " {}(", self.function.name)?; + write!(w, " {}_{}(", self.module_name, self.function.name)?; let mut first_param = true; // The first parameter is a pointer to GPU backing memory, if it's diff --git a/hercules_cg/src/rt.rs b/hercules_cg/src/rt.rs index 8fa0c09ee512e3f2e43c5280bc3cb6947bc31dc5..6981a3da7e59176f73d6fecdde07fe636cc6aecf 100644 --- a/hercules_cg/src/rt.rs +++ b/hercules_cg/src/rt.rs @@ -74,6 +74,7 @@ use crate::*; * set some CUDA memory - the user can then take a CUDA reference to that box. */ pub fn rt_codegen<W: Write>( + module_name: &str, func_id: FunctionID, module: &Module, def_use: &ImmutableDefUseMap, @@ -96,6 +97,7 @@ pub fn rt_codegen<W: Write>( .map(|(fork, join)| (*join, *fork)) .collect(); let ctx = RTContext { + module_name, func_id, module, def_use, @@ -117,6 +119,7 @@ pub fn rt_codegen<W: Write>( } struct RTContext<'a> { + module_name: &'a str, func_id: FunctionID, module: &'a Module, def_use: &'a ImmutableDefUseMap, @@ -157,7 +160,8 @@ impl<'a> RTContext<'a> { // Dump the function signature. write!( w, - "#[allow(unused_assignments,unused_variables,unused_mut,unused_parens,unused_unsafe,non_snake_case)]async unsafe fn {}(", + "#[allow(unused_assignments,unused_variables,unused_mut,unused_parens,unused_unsafe,non_snake_case)]async unsafe fn {}_{}(", + self.module_name, func.name )?; let mut first_param = true; @@ -236,7 +240,7 @@ impl<'a> RTContext<'a> { // Create the return struct write!(w, "let mut ret_struct: ::std::mem::MaybeUninit<ReturnStruct> = ::std::mem::MaybeUninit::uninit();")?; // Call the device function - write!(w, "{}(", callee.name)?; + write!(w, "{}_{}(", self.module_name, callee.name)?; if self.backing_allocations[&callee_id].contains_key(&self.devices[callee_id.idx()]) { write!(w, "backing, ")?; @@ -672,8 +676,9 @@ impl<'a> RTContext<'a> { }; write!( block, - "{}{}(", + "{}{}_{}(", prefix, + self.module_name, self.module.functions[callee_id.idx()].name )?; for (device, (offset, size)) in self.backing_allocations[&self.func_id] @@ -1463,7 +1468,7 @@ impl<'a> RTContext<'a> { } // Call the wrapped function. - write!(w, "let ret = {}(", func.name)?; + write!(w, "let ret = {}_{}(", self.module_name, func.name)?; for (device, _) in self.backing_allocations[&self.func_id].iter() { write!( w, @@ -1630,8 +1635,9 @@ impl<'a> RTContext<'a> { let func = &self.module.functions[func_id.idx()]; write!( w, - "{}fn {}(", + "{}fn {}_{}(", if is_unsafe { "unsafe " } else { "" }, + self.module_name, func.name )?; let mut first_param = true; @@ -1667,7 +1673,7 @@ impl<'a> RTContext<'a> { func_id: FunctionID, ) -> Result<(), Error> { let func = &self.module.functions[func_id.idx()]; - write!(w, "fn {}(", func.name)?; + write!(w, "fn {}_{}(", self.module_name, func.name)?; let mut first_param = true; if self.backing_allocations[&func_id].contains_key(&self.devices[func_id.idx()]) { first_param = false; diff --git a/juno_samples/rodinia/cfd/src/cpu_euler.sch b/juno_samples/rodinia/cfd/src/cpu_euler.sch index 9cbdb942bb484342afe42a2fc711852878474508..5fe48a8395cfb6fada1d668b4f73fa6eb3487f5e 100644 --- a/juno_samples/rodinia/cfd/src/cpu_euler.sch +++ b/juno_samples/rodinia/cfd/src/cpu_euler.sch @@ -1,23 +1,30 @@ -gvn(*); -dce(*); -phi-elim(*); -dce(*); -crc(*); -dce(*); -slf(*); -dce(*); +macro simpl!(X) { + ccp(X); + simplify-cfg(X); + lift-dc-math(X); + gvn(X); + phi-elim(X); + crc(X); + slf(X); + dce(X); + infer-schedules(X); +} -let auto = auto-outline(euler); -cpu(auto.euler); - -inline(auto.euler); -inline(auto.euler); +simpl!(*); +inline(compute_step_factor, compute_flux, compute_flux_contribution, time_step); delete-uncalled(*); +simpl!(*); +ip-sroa[false](*); +sroa[false](*); +predication(*); +const-inline(*); +simpl!(*); +fixpoint { + forkify(*); + fork-guard-elim(*); +} +simpl!(*); -sroa[false](auto.euler); -dce(*); -float-collections(*); -dce(*); +unforkify(*); gcm(*); - diff --git a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch index 252015c368d594e843af34367450257d3459034f..6329c5046e15ca0bce646f4a778dcf8c8d781656 100644 --- a/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch +++ b/juno_samples/rodinia/cfd/src/cpu_pre_euler.sch @@ -1,23 +1,30 @@ -gvn(*); -dce(*); -phi-elim(*); -dce(*); -crc(*); -dce(*); -slf(*); -dce(*); +macro simpl!(X) { + ccp(X); + simplify-cfg(X); + lift-dc-math(X); + gvn(X); + phi-elim(X); + crc(X); + slf(X); + dce(X); + infer-schedules(X); +} -let auto = auto-outline(pre_euler); -cpu(auto.pre_euler); - -inline(auto.pre_euler); -inline(auto.pre_euler); +simpl!(*); +inline(compute_step_factor, compute_flux, compute_flux_contributions, compute_flux_contribution, time_step); delete-uncalled(*); +simpl!(*); +ip-sroa[false](*); +sroa[false](*); +predication(*); +const-inline(*); +simpl!(*); +fixpoint { + forkify(*); + fork-guard-elim(*); +} +simpl!(*); -sroa[false](auto.pre_euler); -dce(*); -float-collections(*); -dce(*); +unforkify(*); gcm(*); - diff --git a/juno_scheduler/src/pm.rs b/juno_scheduler/src/pm.rs index 3c14270057cb14559942499ff5f92df45cd7deab..e049f985e0db36ae78368b8d33c01d22744fdcc6 100644 --- a/juno_scheduler/src/pm.rs +++ b/juno_scheduler/src/pm.rs @@ -951,6 +951,7 @@ impl PassManager { for idx in 0..module.functions.len() { match devices[idx] { Device::LLVM => cpu_codegen( + &module_name, &module.functions[idx], &module.types, &module.constants, @@ -966,6 +967,7 @@ impl PassManager { error: format!("{}", e), })?, Device::CUDA => gpu_codegen( + &module_name, &module.functions[idx], &module.types, &module.constants, @@ -986,6 +988,7 @@ impl PassManager { error: format!("{}", e), })?, Device::AsyncRust => rt_codegen( + &module_name, FunctionID::new(idx), &module, &def_uses[idx],