diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs index a3a46d93943d10bbebd125df93f95685c3ad5ac0..85ee7d90f8e69e74a079c32816a5bf27be764f5b 100644 --- a/hercules_cg/src/gpu.rs +++ b/hercules_cg/src/gpu.rs @@ -631,7 +631,7 @@ extern \"C\" {} {}(", if ret_primitive { ret_type.clone() } else { "void".to_str write!(w, "\tcudaMalloc((void**)&ret, sizeof({}));\n", ret_type)?; write!(pass_args, ", ret")?; } - write!(w, "\t{}<<<{}_gpu, {}, {}>>>({});\n", self.function.name, num_blocks, num_threads, dynamic_shared_offset, pass_args)?; + write!(w, "\t{}_gpu<<<{}, {}, {}>>>({});\n", self.function.name, num_blocks, num_threads, dynamic_shared_offset, pass_args)?; write!(w, "\tcudaDeviceSynchronize();\n")?; if ret_primitive { write!(w, "\t{} host_ret;\n", ret_type)?;