diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu index a472fcaa36484950de98f858a74f185900ab80b7..a9613b347ecbedba1e387a73f47756d478f00aed 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/approx_simulation.cu @@ -532,643 +532,6 @@ void *tensorConvSampSim2(void *input_ptr, void *filter_ptr, int vertical_pad, return output; } -/************ NOTE: API for ApproxHPVM Wrapper runtime *******/ -void *PROMISE_Conv(void *input, float i_min, float i_max, void *filter, - float w_min, float w_max, void *bias, float b_min, - float b_max, int conv_pad_h, int conv_pad_w, - int conv_stride_h, int conv_stride_w, int pool_id, - int pool_size, int pool_stride, - int activation_id, // Relu, Tanh, ClipRelu - float out_min, float out_max, int swing) { - - Tensor *input_t = (Tensor *)input; - Tensor *filter_t = (Tensor *)filter; - Tensor *bias_t = (Tensor *)bias; - - int orig_type = input_t->cur_type; - - DEBUG("FP32 conversions \n"); - - convertToFP32(input_t); - - convertToFP32(filter_t); - convertToFP32(bias_t); - - DEBUG("DONE FP32 conversions \n"); - - if (swing < 8) { - input = quantizeTensorPromise(input, i_min, i_max); - filter = quantizeTensorPromise(filter, w_min, w_max); - if (bias != NULL) - bias = quantizeTensorPromise(bias, b_min, b_max); - // aRead error - - input = addPromiseError(input, swing); - } - - void *conv_out; - conv_out = tensorConvolution(input, filter, conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, 1, 0); - - void *conv_add; - if (bias != NULL) { - conv_add = tensorAdd(conv_out, bias); - } else { - conv_add = conv_out; - } - - void *pool_out; - // NOTE: Skip pooling on negative pool sizes - if (pool_size > 0) { - // FIXME: Currently only using MaxPooling - //-- pool_out = tensorPooling(conv_add, 0, pool_size, pool_size, 0, 0, - // pool_size, pool_size); - pool_out = tensorPooling(conv_add, 0, pool_size, pool_size, 0, 0, - pool_stride, pool_stride); - } else { - pool_out = conv_add; - } - - void *activation_out; - switch (activation_id) { - case -1: - activation_out = pool_out; - INFO("NO Activation Function \n"); - break; - case 0: - activation_out = tensorTanh(pool_out); - break; - case 1: - activation_out = tensorRelu(pool_out); - break; - case 2: - activation_out = tensorRelu2(pool_out, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - - if (swing < 8 && activation_id != -1) { - activation_out = quantizeTensorPromise(activation_out, out_min, out_max); - } - - // NOTE: Convert back to FP16 if original type - if (orig_type == half_type) { - convertToFP16((Tensor *)activation_out); - } - - return activation_out; -} - -void *PROMISE_FC(void *input, float i_min, float i_max, void *weights, - float w_min, float w_max, void *bias, float b_min, float b_max, - int activation_id, float out_min, float out_max, int swing) { - - Tensor *input_t = (Tensor *)input; - Tensor *weights_t = (Tensor *)weights; - Tensor *bias_t = (Tensor *)bias; - - int orig_type = input_t->cur_type; - - convertToFP32(input_t); - convertToFP32(weights_t); - convertToFP32(bias_t); - - if (swing < 8) { - input = quantizeTensorPromise(input, i_min, i_max); - weights = quantizeTensorPromise(weights, w_min, w_max); - if (bias != NULL) - bias = quantizeTensorPromise(bias, b_min, b_max); - - // NOTE: Modelling aRead error in PROMISE - input = addPromiseError(input, swing); - } - - void *gemm_out; - gemm_out = tensorGemmGPU(input, weights); - - void *gemmbias_out; - if (bias != NULL) { - gemmbias_out = tensorAdd(gemm_out, bias); - } else { - gemmbias_out = gemm_out; - } - - void *activation_out; - switch (activation_id) { - - case -1: - activation_out = gemmbias_out; - INFO("No Activation Function \n"); - break; - case 0: - activation_out = tensorTanh(gemmbias_out); - break; - case 1: - activation_out = tensorRelu(gemmbias_out); - break; - case 2: - activation_out = tensorRelu2(gemmbias_out, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - - if (swing < 8 && activation_id != -1) { - activation_out = quantizeTensorPromise(activation_out, out_min, out_max); - } - - // NOTE: Convert back to FP16 if original type - if (orig_type == half_type) { - convertToFP16((Tensor *)activation_out); - } - - return activation_out; -} - -// NOTE: Enabling the macro below is used for testing against the old PROMISE -// wrapper -//#define OLD_MODEL - -#ifndef OLD_MODEL - -bool isPromiseLayer(int swing) { - - if (swing < 8) - return true; - else - return false; -} - -bool isGPULayer(int swing) { - - if (swing > 10) // PROMISE layers are 1-7 - return true; - else - return false; -} - -bool isFullPrecision(int swing) { - - if (swing == 11) - return true; - else - return false; -} - -bool isHalfPrecision(int swing) { - - if (swing == 12) - return true; - else - return false; -} - -bool isPerforation(int swing) { - - if (swing >= 100 && swing <= 200) - return true; - else - return false; -} - -bool isSampling(int swing) { - - if (swing >= 200 && swing <= 300) - return true; - else - return false; -} - -bool isReductionSampling(int swing) { - - if (swing >= 41 && swing <= 49) - return true; - else - return false; -} - -int getSwing(int swing) { - -#ifdef PROMISE_TUNER_ENABLED - - // NOTE: Skip reading file-based error levels for ApproxHPVM wrapper runtime - if (!approxhpvm_runtime_mode) { - - if (op_counter >= total_ops) { - ERROR("No accuracy flag found \n"); - } - - swing = op_accuracies[op_counter]; - op_counter++; - } - -#endif - - DEBUG("---- swing_value = %d \n", swing); - - return swing; -} - -// bool FP16_tuning = false; - -/***** API for Autotuner Use - Not the ApproxHPVM Wrapper API */ - -void initializeAutotuner() { - - DEBUG("initializing tuner .... \n"); - - sampParamSet = new SampParamSet; - perfParamSet = new PerfParamSet; -} - -void *Autotuner_SampConv(void *input, float i_min, float i_max, void *filter, - float w_min, float w_max, void *bias, float b_min, - float b_max, int conv_pad_h, int conv_pad_w, - int conv_stride_h, int conv_stride_w, int pool_id, - int pool_size, - int activation_id, // Relu, Tanh, ClipRelu - float out_min, float out_max, int swing) { - - SampParams params = sampParamSet->getSampParams(swing); - - DEBUG("params.skip_rate = %d, params.skip_offset = %d \n", params.skip_rate, - params.skip_offset); - - void *conv_out; - - if (!FP16_tuning) { - - /* conv_out = tensorConvSampSim(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, 1, 1, - params.skip_rate, params.skip_offset); - */ - - if (SIMULATION_MODE) { - conv_out = tensorConvSampSim2( - input, filter, conv_pad_h, conv_pad_w, conv_stride_h, conv_stride_w, - 1, 1, params.skip_rate, params.skip_offset, params.interpolation_id); - } - - else { - conv_out = tensorConvApprox(input, filter, conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, 1, 1, 1, 1, - params.skip_rate, params.skip_offset); - } - - } else { - - conv_out = tensorConvApproxHalf2(input, filter, conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, 1, 1, 1, 1, - params.skip_rate, params.skip_offset); - } - - return conv_out; -} - -void *Autotuner_PerforatedConv(void *input, float i_min, float i_max, - void *filter, float w_min, float w_max, - void *bias, float b_min, float b_max, - int conv_pad_h, int conv_pad_w, - int conv_stride_h, int conv_stride_w, - int pool_id, int pool_size, - int activation_id, // Relu, Tanh, ClipRelu - float out_min, float out_max, int swing) { - - PerfParams params = perfParamSet->getPerfParams(swing); - - DEBUG("params.row = %d, params.col = %d, params.skip_offset = %d \n", - params.row, params.col, params.skip_offset); - - void *conv_out; - - if (!FP16_tuning) { - - if (SIMULATION_MODE) { - - conv_out = tensorConvPerfCuda(input, filter, conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, 1, 1, - params.row, params.col, params.skip_offset); - - } else { - - conv_out = tensorConvApprox( - input, filter, conv_pad_h, conv_pad_w, conv_stride_h, conv_stride_w, - 1, 1, params.row, params.col, 1, params.skip_offset); - } - - } else { - - conv_out = tensorConvApproxHalf2( - input, filter, conv_pad_h, conv_pad_w, conv_stride_h, conv_stride_w, 1, - 1, params.row, params.col, 1, params.skip_offset); - } - - return conv_out; -} - -void *Autotuner_ConvOp(void *input, float i_min, float i_max, void *filter, - float w_min, float w_max, void *bias, float b_min, - float b_max, int conv_pad_h, int conv_pad_w, - int conv_stride_h, int conv_stride_w, int pool_id, - int pool_size, - int activation_id, // Relu, Tanh, ClipRelu - float out_min, float out_max, int swing) { - - void *conv_out; - if (isPerforation(swing)) { - - conv_out = Autotuner_PerforatedConv( - input, i_min, i_max, filter, w_min, w_max, bias, b_min, b_max, - conv_pad_h, conv_pad_w, conv_stride_h, conv_stride_w, pool_id, - pool_size, activation_id, out_min, out_max, swing); - - } - - else if (isSampling(swing)) { - - conv_out = Autotuner_SampConv( - input, i_min, i_max, filter, w_min, w_max, bias, b_min, b_max, - conv_pad_h, conv_pad_w, conv_stride_h, conv_stride_w, pool_id, - pool_size, activation_id, out_min, out_max, swing); - } - - else if (isHalfPrecision(swing)) { - - if (FP16_tuning) { - - conv_out = tensorHalfConvolution(input, filter, conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, 1, 0); - } else { - conv_out = tensorConvolution(input, filter, conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, 1, 0); - } - - } - - else if (isFullPrecision(swing)) { - conv_out = tensorConvolution(input, filter, conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, 1, 0); - } - - return conv_out; -} - -void *Autotuner_Add(void *input, void *bias, int swing) { - - void *conv_add; - if (bias != NULL) { - - if (isFullPrecision(swing) || !(FP16_tuning)) { - conv_add = tensorAdd(input, bias); - } else { - conv_add = tensorHalfAdd(input, bias); - } - } else { - conv_add = input; - } - - return conv_add; -} - -void *Autotuner_Pooling(void *input, int pool_size, int pool_stride, - int swing) { - - void *pool_out; - - if (pool_size > 0) { - - // FIXME: Currently only using MaxPooling - if (isFullPrecision(swing) || !(FP16_tuning)) { - pool_out = tensorPooling(input, 0, pool_size, pool_size, 0, 0, - pool_stride, pool_stride); - - } - - else { - pool_out = tensorHalfPooling(input, 0, pool_size, pool_size, 0, 0, - pool_stride, pool_stride); - } - - } else { - pool_out = input; - } - - return pool_out; -} - -void *Autotuner_Activation(void *input, int activation_id, int out_min, - int out_max, int swing) { - - void *activation_out; - - if (isFullPrecision(swing) || (!FP16_tuning)) { - - switch (activation_id) { - case -1: - activation_out = input; - INFO("NO Activation Function \n"); - break; - case 0: - activation_out = tensorTanh(input); - break; - case 1: - activation_out = tensorRelu(input); - break; - case 2: - activation_out = tensorRelu2(input, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - } - - else { - - switch (activation_id) { - case -1: - activation_out = input; - INFO("NO Activation Function \n"); - break; - case 0: - activation_out = tensorHalfTanh(input); - break; - case 1: - activation_out = tensorHalfRelu(input); - break; - case 2: - activation_out = tensorHalfRelu2(input, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - } - - return activation_out; -} - -void *Autotuner_GPU_ConvLayer(void *input, float i_min, float i_max, - void *filter, float w_min, float w_max, - void *bias, float b_min, float b_max, - int conv_pad_h, int conv_pad_w, int conv_stride_h, - int conv_stride_w, int pool_id, int pool_size, - int pool_stride, - int activation_id, // Relu, Tanh, ClipRelu - float out_min, float out_max, int swing) { - - void *conv_out = Autotuner_ConvOp( - input, i_min, i_max, filter, w_min, w_max, bias, b_min, b_max, conv_pad_h, - conv_pad_w, conv_stride_h, conv_stride_w, pool_id, pool_size, - activation_id, out_min, out_max, swing); - - void *conv_add = Autotuner_Add(conv_out, bias, swing); - - void *pool_out = Autotuner_Pooling(conv_add, pool_size, pool_stride, swing); - - void *activation_out = - Autotuner_Activation(pool_out, activation_id, out_min, out_max, swing); - - return activation_out; -} - -/**** Top-level API for Handling Convolution Layers - - The granularity of handling is at a layer-level - not tensor-op level - -***/ - -void *Autotuner_ConvLayer(void *input, float i_min, float i_max, void *filter, - float w_min, float w_max, void *bias, float b_min, - float b_max, int conv_pad_h, int conv_pad_w, - int conv_stride_h, int conv_stride_w, int pool_id, - int pool_size, int pool_stride, - int activation_id, // Relu, Tanh, ClipRelu - float out_min, float out_max, int swing) { - - if (FP16_tuning) { - if (ONLINE_PROFILING) { - ERROR("Online Profiling cannot be enabled with PROMISE Simulation \n"); - } - } - - swing = getSwing(swing); - - if (isPromiseLayer(swing)) { - - return PROMISE_Conv(input, i_min, i_max, filter, w_min, w_max, bias, b_min, - b_max, conv_pad_h, conv_pad_w, conv_stride_h, - conv_stride_w, pool_id, pool_size, pool_stride, - activation_id, out_min, out_max, swing); - } - - assert(isGPULayer(swing)); - - return Autotuner_GPU_ConvLayer( - input, i_min, i_max, filter, w_min, w_max, bias, b_min, b_max, conv_pad_h, - conv_pad_w, conv_stride_h, conv_stride_w, pool_id, pool_size, pool_stride, - activation_id, out_min, out_max, swing); -} - -/**** Top-level API Unchanged for backwards compatibility ***/ - -void *ConvLayer_PROMISE(void *input, float i_min, float i_max, void *filter, - float w_min, float w_max, void *bias, float b_min, - float b_max, int conv_pad_h, int conv_pad_w, - int conv_stride_h, int conv_stride_w, int pool_id, - int pool_size, - int activation_id, // Relu, Tanh, ClipRelu - float out_min, float out_max, int swing) { - - return Autotuner_ConvLayer( - input, i_min, i_max, filter, w_min, w_max, bias, b_min, b_max, conv_pad_h, - conv_pad_w, conv_stride_h, conv_stride_w, pool_id, pool_size, - pool_size, // FIXIT: Assumption pool_size == pool_strides - activation_id, out_min, out_max, swing); -} - -void *ConvLayer_PROMISE2(void *input, float i_min, float i_max, void *filter, - float w_min, float w_max, void *bias, float b_min, - float b_max, int conv_pad_h, int conv_pad_w, - int conv_stride_h, int conv_stride_w, int pool_id, - int pool_size, int pool_stride, - int activation_id, // Relu, Tanh, ClipRelu - float out_min, float out_max, int swing) { - - return Autotuner_ConvLayer( - input, i_min, i_max, filter, w_min, w_max, bias, b_min, b_max, conv_pad_h, - conv_pad_w, conv_stride_h, conv_stride_w, pool_id, pool_size, pool_stride, - activation_id, out_min, out_max, swing); -} - -void * -FCLayer_PROMISE(void *input, float i_min, float i_max, void *weights, - float w_min, float w_max, void *bias, float b_min, float b_max, - int activation_id, float out_min, float out_max, - int swing) { // NOTE: min_val, max_val apply to 'ClippedRelu' - - swing = getSwing(swing); - - if (isPromiseLayer(swing)) { - - return PROMISE_FC(input, i_min, i_max, weights, w_min, w_max, bias, b_min, - b_max, activation_id, out_min, out_max, swing); - } - - assert(isGPULayer(swing)); - - void *gemm_out; - if (!isFullPrecision(swing)) { - gemm_out = tensorHalfGemm(input, weights); - } else { - gemm_out = tensorGemmGPU(input, weights); - } - - void *gemmbias_out; - if (bias != NULL) { - // Swing 8 corresponds to FP32 - if (isFullPrecision(swing) || (!FP16_tuning)) { - gemmbias_out = tensorAdd(gemm_out, bias); - } else { - gemmbias_out = tensorHalfAdd(gemm_out, bias); - } - } else { - gemmbias_out = gemm_out; - } - - void *activation_out; - switch (activation_id) { - - case -1: - activation_out = gemmbias_out; - INFO("No Activation Function \n"); - break; - case 0: - activation_out = tensorTanh(gemmbias_out); - break; - case 1: - activation_out = tensorRelu(gemmbias_out); - break; - case 2: - activation_out = tensorRelu2(gemmbias_out, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - - return activation_out; -} - -#endif - -#ifdef OLD_MODEL - -#endif #endif diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/init_api.cc b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/init_api.cc index b322ee2be37b60487e15c9109d4230adf1ad84e2..641b342650dce013f0dabd2f1cb4d05c89326798 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/init_api.cc +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/init_api.cc @@ -40,16 +40,6 @@ void llvm_hpvm_initTensorRt(int gpuid) { DEBUG("CREATED HANDLES %d \n", gpuid); -#ifdef PROMISE_TUNER_ENABLED - // readOpenTunerFlags("opentuner_flags"); - - readOpenTunerFlags("promise_flags"); - initializeAutotuner(); - - DEBUG("Read PROMISE FLAGS %d \n", gpuid); - -#endif - runtime_initialized = true; }