Skip to content
Snippets Groups Projects
Commit 60716a38 authored by Hashim Sharif's avatar Hashim Sharif
Browse files

Implementing Quantization routines for PROMISE-like API

parent 74d0793f
No related branches found
No related tags found
No related merge requests found
......@@ -399,6 +399,28 @@ void testSoftmaxOutput(void* output_ptr){
}
void testQuantization(){
printf("***** TensorQuantize ***** \n\n");
void* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 4, 1, 1);
float* host_ptr = (float*) ((struct Tensor*) input)->host_data;
host_ptr[0] = -0.1;
host_ptr[1] = -25;
host_ptr[2] = 0.2;
host_ptr[3] = -0.4;
host_ptr[4] = 1.7;
host_ptr[5] = -2.9;
host_ptr[6] = 0.7;
host_ptr[7] = 0.99;
void* quantize_result = quantizeTensorPromise(input, -4, 6);
printTensorValues(quantize_result);
}
int main(){
......@@ -407,10 +429,12 @@ int main(){
startProfiling();
testTensorHgemm2();
testTensorSgemm2();
testTensorConv();
testTensorError();
//testTensorHgemm2();
//testTensorSgemm2();
//testTensorConv();
//testTensorError();
testQuantization();
//testTensorGemm();
//testTensorGemmGPU();
......
......@@ -600,6 +600,59 @@ void* addGaussianError(void* x_ptr, int error_scale){
__global__ void quantizeAndClip(float* A, int n, float mul_factor, float min, float max){
int id = blockIdx.x * blockDim.x + threadIdx.x;
if(id < n){
int temp = (A[id] - min) / mul_factor;
float result = temp * 1.0 * mul_factor;
result = result + min;
A[id] = result;
if(A[id] > max){
A[id] = max;
}
if(A[id] < min){
A[id] = min;
}
}
}
__global__ void quantizeElem(float* A, int n, float mul_factor, float min){
int id = blockIdx.x * blockDim.x + threadIdx.x;
if(id < n){
int temp = (A[id] - min) / mul_factor;
float result = temp * 1.0 * mul_factor;
result = result + min;
A[id] = result;
}
}
void* quantizeTensorPromise(void* input_ptr, float min, float max){
Tensor* input = (Tensor*) input_ptr;
int quantize_range = 256;
float input_range = max - min;
float mul_factor = input_range / quantize_range;
INFO("mul_factor = %f \n", mul_factor);
int blockSize = 1024;
int gridSize = (int) ceil ((float) input->num_elems / blockSize);
INFO("blockSize = %d, gridSize = %d \n", blockSize, gridSize);
hostToDeviceCopy(input);
quantizeAndClip<<<gridSize, blockSize>>>((float*) input->gpu_data, input->num_elems, mul_factor, min, max);
return input;
}
void* tensorAddError(void* x_ptr, int error_scale){
void * new_x = addGaussianError(x_ptr, error_scale);
......
......@@ -122,8 +122,7 @@ void readOpenTunerFlags(char* file_name);
void clearOpCounter();
void clearTensorMap();
void freeOutputTensors();
void* quantizeTensorPromise(void* input_ptr, float min, float max);
/*
void dummyFunction(){
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment