diff --git a/.gitignore b/.gitignore
index a39f1eeab21b98c0c9b1bced43e6b09b0b17ba06..ec301811d1dfaae8eca6bcb34f948809c35c87fe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,5 +34,5 @@ llvm/test/VISC/parboil/benchmarks/*/run
 llvm/test/VISC/parboil/benchmarks/*/build
 llvm/build
 llvm/install
-build/
+build*/
 install/
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_img_runtime_utils.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_img_runtime_utils.h
index 413e2f2bbf6cec280b522ffe2e42af7c5622bcea..d19e26728ade5d43c46e54f396ec0adae7bd6208 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_img_runtime_utils.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approxhpvm_img_runtime_utils.h
@@ -63,8 +63,11 @@ void *handleTensorReduceApproximationTuples(
     }
     case GPUNodeConfiguration::APPROX::FP16: {
       void *t_out;
+      void *half_func = find_half_function(func);
+      if (half_func == nullptr)
+        throw std::runtime_error("Half version of function does not exist");
       RC->resume_profiler();
-      t_out = tensorReduce(input, axis, func, true);
+      t_out = tensorReduce(input, axis, half_func, true);
       RC->pause_profiler();
       std::pair<double, double> pinfo = RC->get_time_energy();
       RC->reset_profiler();
@@ -155,8 +158,11 @@ void *handleTensorMap1ApproximationTuples(
     }
     case GPUNodeConfiguration::APPROX::FP16: {
       void *t_out;
+      void *half_func = find_half_function(func);
+      if (half_func == nullptr)
+        throw std::runtime_error("Half version of function does not exist");
       RC->resume_profiler();
-      t_out = tensorMap1(func, input, true);
+      t_out = tensorMap1(half_func, input, true);
       RC->pause_profiler();
       std::pair<double, double> pinfo = RC->get_time_energy();
       RC->reset_profiler();
@@ -200,8 +206,11 @@ void *handleTensorMap2ApproximationTuples(
     }
     case GPUNodeConfiguration::APPROX::FP16: {
       void *t_out;
+      void *half_func = find_half_function(func);
+      if (half_func == nullptr)
+        throw std::runtime_error("Half version of function does not exist");
       RC->resume_profiler();
-      t_out = tensorMap2(func, input1, input2, true);
+      t_out = tensorMap2(half_func, input1, input2, true);
       RC->pause_profiler();
       std::pair<double, double> pinfo = RC->get_time_energy();
       RC->reset_profiler();
@@ -245,8 +254,11 @@ void *handleTensorMap3ApproximationTuples(
     }
     case GPUNodeConfiguration::APPROX::FP16: {
       void *t_out;
+      void *half_func = find_half_function(func);
+      if (half_func == nullptr)
+        throw std::runtime_error("Half version of function does not exist");
       RC->resume_profiler();
-      t_out = tensorMap3(func, input1, input2, input3, true);
+      t_out = tensorMap3(half_func, input1, input2, input3, true);
       RC->pause_profiler();
       std::pair<double, double> pinfo = RC->get_time_energy();
       RC->reset_profiler();
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/device_math.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/device_math.h
index 353e966951c3373495afb161d8067e4dfbef5fea..9398730d7eef0f4a02c5aabe14716a7195760aa5 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/device_math.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/device_math.h
@@ -1,3 +1,6 @@
+#ifndef DEVICE_MATH_H
+#define DEVICE_MATH_H
+
 #include <device_launch_parameters.h>
 
 namespace device {
@@ -25,13 +28,13 @@ extern void *sqrt_ptrptr;
 extern void *fmax_ptrptr;
 extern void *fmin_ptrptr;
 extern void *favg3_ptrptr;
-
-extern void *hhypot_ptrptr;
-extern void *hadd_ptrptr;
-extern void *hmax_ptrptr;
-extern void *hdiv_ptrptr;
 } // namespace device
 
 #define DEF_FUNC(func)                                                         \
   __device__ void *func##_ptr = (void *)func;                                  \
   void *func##_ptrptr = (void *)&func##_ptr;
+
+
+void *find_half_function(void *known_float_func);
+
+#endif
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/device_math.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/device_math.cu
index f20de26c0a7f4647af62fb51e871129d58fc5540..6230df2a915be5ccfbbd525b1fe68d9d133e6a18 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/device_math.cu
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/device_math.cu
@@ -163,6 +163,8 @@ __device__ half hhypot(half x, half y) { return hsqrt(x * x + y * y); }
 
 __device__ half hmax(half x, half y) { return x >= y ? x : y; }
 
+__device__ half havg3(half x, half y) { return __hdiv(x, 3.0f); }
+
 __device__ void *hypot_ptr = (void *)device::hypot;
 __device__ void *atan2_ptr = (void *)device::atan2;
 __device__ void *fadd_ptr = (void *)device::add;
@@ -178,6 +180,7 @@ __device__ void *hhypot_ptr = (void *)hhypot;
 __device__ void *hadd_ptr = (void *)(half(*)(half, half))__hadd;
 __device__ void *hmax_ptr = (void *)(half(*)(half, half))hmax;
 __device__ void *hdiv_ptr = (void *)__hdiv;
+__device__ void *havg3_ptr = (void *)havg3;
 
 namespace device {
 void *fhypot_ptrptr = (void *)&hypot_ptr;
@@ -191,8 +194,34 @@ void *fmax_ptrptr = (void *)&fmax_ptr;
 void *fmin_ptrptr = (void *)&fmin_ptr;
 void *favg3_ptrptr = (void *)&favg3_ptr;
 
+} // namespace device
+
 void *hhypot_ptrptr = (void *)&hhypot_ptr;
 void *hadd_ptrptr = (void *)&hadd_ptr;
 void *hmax_ptrptr = (void *)&hmax_ptr;
 void *hdiv_ptrptr = (void *)&hdiv_ptr;
-} // namespace device
+void *havg3_ptrptr = (void *)&havg3_ptr;
+
+void *find_half_function(void *known_float_func) {
+  if (known_float_func == device::fhypot_ptrptr)
+    return hhypot_ptrptr;
+  if (known_float_func == device::atan2_ptrptr)
+    return nullptr;
+  if (known_float_func == device::fadd_ptrptr)
+    return hadd_ptrptr;
+  if (known_float_func == device::fsub_ptrptr)
+    return nullptr;
+  if (known_float_func == device::fmul_ptrptr)
+    return nullptr;
+  if (known_float_func == device::fdiv_ptrptr)
+    return hdiv_ptrptr;
+  if (known_float_func == device::sqrt_ptrptr)
+    return nullptr;
+  if (known_float_func == device::fmax_ptrptr)
+    return hmax_ptrptr;
+  if (known_float_func == device::fmin_ptrptr)
+    return nullptr;
+  if (known_float_func == device::favg3_ptrptr)
+    return havg3_ptrptr;
+  return nullptr;
+}