Implementing Quantization routines for PROMISE-like API

60716a38 · Hashim Sharif · 74d0793f · 60716a38 · 60716a38 · 60716a38
Commit 60716a38 authored 6 years ago by Hashim Sharif
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc
@@ -399,6 +399,28 @@ void testSoftmaxOutput(void* output_ptr){
 }


+void testQuantization(){
+
+  printf("***** TensorQuantize ***** \n\n");
+  void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 4, 1, 1);
+
+  float* host_ptr = (float*) ((struct Tensor*) input)->host_data;
+  host_ptr[0] = -0.1;
+  host_ptr[1] = -25;
+  host_ptr[2] = 0.2;
+  host_ptr[3] = -0.4;
+  host_ptr[4] = 1.7;
+  host_ptr[5] = -2.9;
+  host_ptr[6] = 0.7;
+  host_ptr[7] = 0.99;
+
+  void* quantize_result = quantizeTensorPromise(input, -4, 6);
+  
+  printTensorValues(quantize_result);
+  
+}
+
+


 int main(){
@@ -407,10 +429,12 @@ int main(){

  startProfiling();
  
-  testTensorHgemm2();
-  testTensorSgemm2();
-  testTensorConv();
-  testTensorError();
+  //testTensorHgemm2();
+  //testTensorSgemm2();
+  //testTensorConv();
+  //testTensorError();
+
+  testQuantization();

  //testTensorGemm();
  //testTensorGemmGPU();

--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/error.h
@@ -600,6 +600,59 @@ void* addGaussianError(void* x_ptr, int error_scale){



+__global__ void quantizeAndClip(float* A, int n, float mul_factor, float min, float max){
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+  if(id < n){
+    int temp = (A[id] - min) / mul_factor;
+    float result = temp * 1.0 * mul_factor;
+    result = result + min;
+    A[id] = result;
+
+    if(A[id] > max){
+      A[id] = max;
+    }
+    if(A[id] < min){
+      A[id] = min;
+    }
+    
+  }
+}
+
+
+__global__ void quantizeElem(float* A, int n, float mul_factor, float min){
+
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+  if(id < n){
+    int temp = (A[id] - min) / mul_factor;
+    float result = temp * 1.0 * mul_factor;
+    result = result + min;
+    A[id] = result;
+  }
+}
+
+
+void* quantizeTensorPromise(void* input_ptr, float min, float max){
+
+  Tensor* input = (Tensor*) input_ptr;
+  
+  int quantize_range = 256;
+  float input_range = max - min;
+  float mul_factor = input_range / quantize_range;
+  INFO("mul_factor = %f \n", mul_factor);
+
+  int blockSize = 1024;
+  int gridSize = (int) ceil ((float) input->num_elems / blockSize);
+  INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize);
+
+  hostToDeviceCopy(input);
+
+  quantizeAndClip<<<gridSize, blockSize>>>((float*) input->gpu_data, input->num_elems, mul_factor, min, max);
+
+  return input;
+}
+
+
 void* tensorAddError(void* x_ptr, int error_scale){

  void * new_x = addGaussianError(x_ptr, error_scale);

--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
@@ -122,8 +122,7 @@ void readOpenTunerFlags(char* file_name);
 void clearOpCounter();
 void clearTensorMap();
 void freeOutputTensors();
-
-
+void* quantizeTensorPromise(void* input_ptr, float min, float max);

 /*
 void dummyFunction(){