diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs
index 0c69544bc02eac9020bd392400564054e6ad55a7..2151dc0dfd53526c8c39fb755912bfcc63cb6a08 100644
--- a/hercules_cg/src/gpu.rs
+++ b/hercules_cg/src/gpu.rs
@@ -419,7 +419,9 @@ namespace cg = cooperative_groups;
         }
         // Pull primitive return to a pointer parameter
         if self.types[self.return_type_id.idx()].is_primitive() {
-            write!(w, ", ")?;
+            if !first_param {
+                write!(w, ", ")?;
+            }
             write!(
                 w,
                 "{} __restrict__ ret",
@@ -1907,7 +1909,7 @@ extern \"C\" {} {}(", if ret_primitive { ret_type.clone() } else { "void".to_str
                 _ => "sinh",
             },
             Intrinsic::Sqrt => match ty {
-                Type::Float32 => "__sqrtf",
+                Type::Float32 => "sqrtf",
                 ty if ty.is_signed() || ty.is_unsigned() => "isqrt",
                 _ => "sqrt",
             },