diff --git a/hercules_cg/src/gpu.rs b/hercules_cg/src/gpu.rs index e7195223010777a79307d4dd38ef6c7d27272f16..0c69544bc02eac9020bd392400564054e6ad55a7 100644 --- a/hercules_cg/src/gpu.rs +++ b/hercules_cg/src/gpu.rs @@ -371,6 +371,7 @@ impl GPUContext<'_> { #include <stddef.h> #include <cuda.h> #include <cuda_runtime.h> +#include <math_constants.h> #include <mma.h> #include <cooperative_groups.h> #include <cooperative_groups/memcpy_async.h> @@ -1906,7 +1907,7 @@ extern \"C\" {} {}(", if ret_primitive { ret_type.clone() } else { "void".to_str _ => "sinh", }, Intrinsic::Sqrt => match ty { - ty if ty.is_float() => "__sqrtf", + Type::Float32 => "__sqrtf", ty if ty.is_signed() || ty.is_unsigned() => "isqrt", _ => "sqrt", },