Adding Half precision support in PROMISE layer API

e768c3b4 · Hashim Sharif · 68905098 · e768c3b4
Commit e768c3b4 authored 6 years ago by Hashim Sharif
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
@@ -67,6 +67,12 @@ void llvm_hpvm_initTensorRt(int gpuid){
    readOpenTunerFlags("opentuner_flags");
 #endif
+#ifdef PROMISE_TUNER_ENABLED
+    readOpenTunerFlags("opentuner_flags");
+#endif
    //#ifdef PROMISE_MODE
    // readQuantRanges("quant_ranges");
    //#endif
@@ -671,8 +677,8 @@ void* tensorGemmGPU(void* lhs_ptr, void* rhs_ptr){
  Tensor* lhs = (Tensor*) lhs_ptr;
  Tensor* rhs = (Tensor*) rhs_ptr;
-  printTensorDims2(lhs);
+  //printTensorDims2(lhs);
-  printTensorDims2(rhs);
+  //printTensorDims2(rhs);
  INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims);
  INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims);
@@ -726,7 +732,7 @@ void* tensorGemmGPU(void* lhs_ptr, void* rhs_ptr){
- #ifdef ERROR_INJECTION_ENABLED
+  #ifdef ERROR_INJECTION_ENABLED
  if(op_counter >= total_ops){
    ERROR("No accuracy flag found \n");
@@ -1133,6 +1139,18 @@ void* ConvLayer_PROMISE(void* input, float i_min, float i_max,
 			float out_min, float out_max, int swing){ // NOTE: min_val, max_val apply to 'ClippedRelu'
+  #ifdef PROMISE_TUNER_ENABLED
+  if(op_counter >= total_ops){
+    ERROR("No accuracy flag found \n");
+  }
+  swing = op_accuracies[op_counter];
+  op_counter++;
+  #endif
  if (swing < 0 || swing > 9){
    ERROR("Incorrect swing value");
  }
@@ -1144,14 +1162,30 @@ void* ConvLayer_PROMISE(void* input, float i_min, float i_max,
    // aRead error
    input = addPromiseError(input, swing);
  }
-  void* conv_out = tensorConvolution(input, filter,
+  void* conv_out;  
+  if(swing == 8){  
+    conv_out = tensorHalfConvolution(input, filter,
 				     conv_pad_h, conv_pad_w,
 				     conv_stride_h, conv_stride_w,
 				     1, 0);
+  }
+  else{
+    conv_out = tensorConvolution(input, filter,
+				 conv_pad_h, conv_pad_w,
+				 conv_stride_h, conv_stride_w,
+				 1, 0);
+  }
  void* conv_add;
  if(bias != NULL){
-    conv_add = tensorAdd(conv_out, bias);
+    if(swing == 8){  
+      conv_add = tensorHalfAdd(conv_out, bias);
+    }
+    else{
+      conv_add = tensorAdd(conv_out, bias);
+    }
  }
  else{
    conv_add = conv_out;
@@ -1201,6 +1235,19 @@ void* FCLayer_PROMISE(void* input, float i_min, float i_max,
 		      int activation_id,
 		      float out_min, float out_max, int swing){ // NOTE: min_val, max_val apply to 'ClippedRelu'
+  #ifdef PROMISE_TUNER_ENABLED
+  if(op_counter >= total_ops){
+    ERROR("No accuracy flag found \n");
+  }
+  swing = op_accuracies[op_counter];
+  op_counter++;
+  #endif
  if (swing < 0 || swing > 9){
    ERROR("Incorrect swing value");
  }
@@ -1213,13 +1260,26 @@ void* FCLayer_PROMISE(void* input, float i_min, float i_max,
    // NOTE: Modelling aRead error in PROMISE
    input = addPromiseError(input, swing);
  }
-  void* gemm_out = tensorGemmGPU(input, weights);
+  void* gemm_out;
+  if(swing == 8){
+    gemm_out = tensorHalfGemm(input, weights);
+  }
+  else{
+    gemm_out = tensorGemmGPU(input, weights);
+  }
  void* gemmbias_out;
  if(bias != NULL){
-    gemmbias_out = tensorAdd(gemm_out, bias);
+    // Swing 8 corresponds to FP32
+    if(swing == 8){
+      gemmbias_out = tensorHalfAdd(gemm_out, bias);
+    }
+    else{
+      gemmbias_out = tensorAdd(gemm_out, bias);
+    }
  }
  else{
    gemmbias_out = gemm_out;