From 2d1d9282c7090a3b207664f65d6a848e147357c2 Mon Sep 17 00:00:00 2001 From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu> Date: Thu, 5 Dec 2019 01:15:11 -0600 Subject: [PATCH] Fixing rescaling related bug --- .../dnn_sources/src/test_ops.cc | 24 ++++- .../include/approx_simulation.h | 90 +++++++++++++------ .../include/approx_techniques2.h | 4 +- 3 files changed, 85 insertions(+), 33 deletions(-) diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc index dfa4111260..43151dd965 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/test_ops.cc @@ -477,8 +477,12 @@ void testQuantization(){ void testSampleFilter(){ printf("***** Tensor Sample Filter ***** \n\n"); - Tensor* input = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 4, 2, 2, 2); - fillTensorWithVal(input, 3); + Tensor* input = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 3, 3); + //fillTensorWithVal(input, 3); + fillWithOnesAndTwos(input); + + Tensor* input2 = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 3, 2, 32, 32); + fillTensorWithVal(input2, 1); /* float* host_ptr = (float*) ((struct Tensor*) input)->host_data; host_ptr[0] = -0.1; @@ -493,15 +497,27 @@ void testSampleFilter(){ printTensorValues(input); - printf("\n\n"); + /* printf("\n\n"); hpvm_request_tensor(input, DEVICE); sampleFilter(input, 2, 1); hpvm_request_tensor(input, HOST); - + printTensorValues(input); + */ + + void* exact_res = tensorConvolution(input2, input, 0, 0, + 1, 1, 1, 1); + printTensorValues(exact_res); + + void* res = tensorConvSampSim(input2, input, 0, 0, 1, 1, 1, 1, 4, 0); + + //void* res = tensorConvApprox(input2, input, 0, 0, 1, 1, 1, 1, 1, 1, 4, 3); + + printTensorValues(res); + } diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_simulation.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_simulation.h index 5077c77ffb..384464f44d 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_simulation.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_simulation.h @@ -261,7 +261,8 @@ void sampleFilter(Tensor* filter, int skip_rate, int skip_offset){ int numBlocks = (n * c * h * w + 127) / 128; int N = n * c * h * w; - float mul_factor = skip_rate / (skip_rate - 1); + //float mul_factor = skip_rate / (skip_rate - 1); + float mul_factor = (skip_rate * 1.0) / (skip_rate - 1); printf ("mul_factor = %f \n", mul_factor); @@ -804,6 +805,10 @@ SampParams getSampParams(int swing){ /***** API for Autotuner Use - Not the ApproxHPVM Wrapper API */ + +// NOTE: code to compute the gold result - for norm computations +//bool compute_norms = false; // true; //false; + void* ConvLayer_PROMISE(void* input, float i_min, float i_max, void* filter, float w_min, float w_max, void* bias, float b_min, float b_max, @@ -834,17 +839,6 @@ void* ConvLayer_PROMISE(void* input, float i_min, float i_max, } - - // NOTE: code to compute the gold result - for norm computations - bool compute_norms = false; - void* gold; - if (compute_norms){ - - gold = tensorConvolution(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - 1, 0); - } @@ -855,10 +849,10 @@ void* ConvLayer_PROMISE(void* input, float i_min, float i_max, DEBUG("params.row = %d, params.col = %d, params.skip_offset = %d \n", params.row, params.col, params.skip_offset); - conv_out = tensorConvPerfCuda(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, 1, 1, - params.row, params.col, params.skip_offset); + conv_out = tensorConvPerfCudaHalf(input, filter, + conv_pad_h, conv_pad_w, + conv_stride_h, conv_stride_w, 1, 1, + params.row, params.col, params.skip_offset); } @@ -867,12 +861,30 @@ void* ConvLayer_PROMISE(void* input, float i_min, float i_max, SampParams params = getSampParams(swing); DEBUG("params.skip_rate = %d, params.skip_offset = %d \n", params.skip_rate, params.skip_offset); + + /* + conv_out = tensorConvolutionKernelSamp(input, filter, + conv_pad_h, conv_pad_w, + conv_stride_h, conv_stride_w, + 1, 1, + 2); + */ + conv_out = tensorConvSampSim(input, filter, conv_pad_h, conv_pad_w, conv_stride_h, conv_stride_w, 1, 1, params.skip_rate, params.skip_offset); + + /* conv_out = tensorConvApproxHalf(input, filter, + conv_pad_h, conv_pad_w, + conv_stride_h, conv_stride_w, 1, 1, + 1,1, + 4, 3); + // params.skip_rate, params.skip_offset); + */ + } @@ -891,17 +903,7 @@ void* ConvLayer_PROMISE(void* input, float i_min, float i_max, 1, 0); } - - - - if (compute_norms){ - - Norm_t* norms = calculateNormsTreeReduction((struct Tensor*) conv_out, - (struct Tensor*) gold); - add_norms(norms, "tensorConv", swing); - add_conv_overheads(input, filter, conv_stride_h, conv_stride_w, swing); - } - + @@ -1017,6 +1019,7 @@ void* FCLayer_PROMISE(void* input, float i_min, float i_max, break; } + return activation_out; @@ -1361,3 +1364,36 @@ void* FCLayer_PROMISE(void* input, float i_min, float i_max, */ + +/* void* gold; + if (compute_norms){ + + gold = tensorConvolution(input, filter, + conv_pad_h, conv_pad_w, + conv_stride_h, conv_stride_w, + 1, 0); + } + + + + + if (compute_norms){ + + Norm_t* norms = calculateNormsTreeReduction((struct Tensor*) conv_out, + (struct Tensor*) gold); + add_norms(norms, "tensorConv", swing); + add_conv_overheads(input, filter, conv_stride_h, conv_stride_w, swing); + } + + + + if (compute_norms){ + Norm_t* norms = calculateNormsTreeReduction((struct Tensor*) activation_out, + (struct Tensor*) activation_out); + add_norms(norms, "tensorMul", swing); + add_gemm_overheads(input, weights, swing); + } + + + +*/ diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_techniques2.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_techniques2.h index e2905db99a..6042cc7dae 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_techniques2.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/approx_techniques2.h @@ -831,7 +831,7 @@ __global__ void createReducedFiltersHalf(__half * output, if(offset % skip_every != skip_every-1-skip_offset) { //are we including this filter element? const int output_row = offset - ((offset + skip_every)/skip_every); //correct for skip_every = 2 output[fIdx*reduced_filter_elem + output_row] = - __hmul((skip_every / (skip_every - 1)), input[tx]); + __hmul((skip_every * 1.0 / (skip_every - 1)), input[tx]); } } } @@ -1147,7 +1147,7 @@ __global__ void createReducedFiltersFull(float * output, if(offset % skip_every != skip_every-1-skip_offset) { //are we including this filter element? const int output_row = offset - ((offset + skip_every)/skip_every); //correct for skip_every = 2 output[fIdx*reduced_filter_elem + output_row] = - (skip_every / (skip_every - 1)) * input[tx]; + (skip_every * 1.0 / (skip_every - 1)) * input[tx]; } } } -- GitLab