diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs
index e7195223010777a79307d4dd38ef6c7d27272f16..0c69544bc02eac9020bd392400564054e6ad55a7 100644
--- a/hercules_cg/src/gpu.rs
+++ b/hercules_cg/src/gpu.rs
@@ -371,6 +371,7 @@ impl GPUContext<'_> {
 #include <stddef.h>
 #include <cuda.h>
 #include <cuda_runtime.h>
+#include <math_constants.h>
 #include <mma.h>
 #include <cooperative_groups.h>
 #include <cooperative_groups/memcpy_async.h>
@@ -1906,7 +1907,7 @@ extern \"C\" {} {}(", if ret_primitive { ret_type.clone() } else { "void".to_str
                 _ => "sinh",
             },
             Intrinsic::Sqrt => match ty {
-                ty if ty.is_float() => "__sqrtf",
+                Type::Float32 => "__sqrtf",
                 ty if ty.is_signed() || ty.is_unsigned() => "isqrt",
                 _ => "sqrt",
             },