diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs
index 1ba8302ab84c96d818e8fdbfecea10576ed946a5..c9720273c03243d4874b27fc6c74f20fd21a6c33 100644
--- a/hercules_cg/src/gpu.rs
+++ b/hercules_cg/src/gpu.rs
@@ -724,8 +724,13 @@ namespace cg = cooperative_groups;
         write!(w, "\tcudaError_t err;\n")?;
         write!(
             w,
-            "\t{}_gpu<<<{}, {}, {}>>>({});\n",
-            self.function.name, num_blocks, num_threads, dynamic_shared_offset, pass_args
+            "\t{}_{}_gpu<<<{}, {}, {}>>>({});\n",
+            self.module_name,
+            self.function.name,
+            num_blocks,
+            num_threads,
+            dynamic_shared_offset,
+            pass_args
         )?;
         write!(w, "\terr = cudaGetLastError();\n")?;
         write!(