rarbore2
--- a/hercules_cg/src/gpu.rs

+ 13

− 5
+++ b/hercules_cg/src/gpu.rs

+ 13

− 5
 @@ -14,6 +14,7 @@ use crate::*;
 * of similarities with the CPU LLVM generation plus custom GPU parallelization.
 */
 pub fn gpu_codegen<W: Write>(
+    module_name: &str,
    function: &Function,
    types: &Vec<Type>,
    constants: &Vec<Constant>,
 @@ -170,6 +171,7 @@ pub fn gpu_codegen<W: Write>(
    };

    let ctx = GPUContext {
+        module_name,
        function,
        types,
        constants,
 @@ -199,6 +201,7 @@ struct GPUKernelParams {
 }

 struct GPUContext<'a> {
+    module_name: &'a str,
    function: &'a Function,
    types: &'a Vec<Type>,
    constants: &'a Vec<Constant>,
 @@ -395,8 +398,8 @@ namespace cg = cooperative_groups;
    fn codegen_kernel_begin<W: Write>(&self, w: &mut W) -> Result<(), Error> {
        write!(
            w,
-            "__global__ void __launch_bounds__({}) {}_gpu(",
-            self.kernel_params.max_num_threads, self.function.name
+            "__global__ void __launch_bounds__({}) {}_{}_gpu(",
+            self.kernel_params.max_num_threads, self.module_name, self.function.name
        )?;
        let mut first_param = true;
        // The first parameter is a pointer to GPU backing memory, if it's
 @@ -645,7 +648,7 @@ namespace cg = cooperative_groups;
        } else {
            write!(w, "{}", self.get_type(self.function.return_types[0], false))?;
        }
-        write!(w, " {}(", self.function.name)?;
+        write!(w, " {}_{}(", self.module_name, self.function.name)?;

        let mut first_param = true;
        // The first parameter is a pointer to GPU backing memory, if it's
 @@ -721,8 +724,13 @@ namespace cg = cooperative_groups;
        write!(w, "\tcudaError_t err;\n")?;
        write!(
            w,
-            "\t{}_gpu<<<{}, {}, {}>>>({});\n",
-            self.function.name, num_blocks, num_threads, dynamic_shared_offset, pass_args
+            "\t{}_{}_gpu<<<{}, {}, {}>>>({});\n",
+            self.module_name,
+            self.function.name,
+            num_blocks,
+            num_threads,
+            dynamic_shared_offset,
+            pass_args
        )?;
        write!(w, "\terr = cudaGetLastError();\n")?;
        write!(