diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h index 6e2269a72f3405170c1ab00b42af8d0701e752f5..8deddc88264bc4327bec8dad1709eac0d1a40322 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h @@ -120,13 +120,7 @@ extern "C"{ void* tensorHalfBatchNorm(void* input_ptr, void* gamma_ptr, void* beta_ptr, void* mean_ptr, void* variance_ptr, double epsilon); - void* tensorConvBiasReLU( - void* input_ptr, void* filter_ptr, void *bias_ptr, - int vertical_pad, int horizontal_pad, - int vertical_stride, int horizontal_stride, - int conv_mode, int conv_groups - ); - + /* Error injection API - used for accuracy tuning */ void* tensorAddError(void* x_ptr, int error_scale); diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu index c342a88a54d9785344449ac9e496ecd96cc2effc..a80b2f726f7d9d8fd120cfc7390daf9f00b819c6 100644 --- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu +++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu @@ -21,8 +21,6 @@ #include <sstream> #include <string> #include <vector> -#include <cassert> -#include <cstring> #include <cuda_runtime.h> #include <device_launch_parameters.h> @@ -1284,102 +1282,7 @@ void* FCLayer_GPU(void* input, return activation_out; } -void* tensorConvBiasReLU( - void* input_ptr, void* filter_ptr, void *bias_ptr, - int vertical_pad, int horizontal_pad, - int vertical_stride, int horizontal_stride, - int conv_mode, int conv_groups -) { - profileEvent("ConvBiasReLU"); - - // Check, move, convert inputs - auto *input = (Tensor *) input_ptr; - auto *filter = (Tensor *) filter_ptr; - auto *bias = (Tensor *) bias_ptr; - if ( - input->tensor_desc == nullptr || filter->filter_desc == nullptr || - bias->tensor_desc == nullptr - ) - ERROR("Tensor descriptor is NULL"); - hostToDeviceCopy(input); - hostToDeviceCopy(filter); - hostToDeviceCopy(bias); - convertToFP32(input); - convertToFP32(filter); - convertToFP32(bias); - // Create convolution descriptor - cudnnConvolutionDescriptor_t convDesc; - checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc)); - checkCUDNN(cudnnSetConvolution2dDescriptor( - convDesc, - vertical_pad, horizontal_pad, // conv padding - vertical_stride, horizontal_stride, // conv strides - 1, 1, // dilation = 1 - CUDNN_CROSS_CORRELATION, // Use correlation mode - CUDNN_DATA_FLOAT // use float precision - )); - // NOTE: Adding support for grouped convolution - checkCUDNN(cudnnSetConvolutionGroupCount(convDesc, conv_groups)); - // Use convolution descriptor to get output size, and create output tensor. - int n, c, h, w; - checkCUDNN(cudnnGetConvolution2dForwardOutputDim( - convDesc, input->tensor_desc, filter->filter_desc, - &n, &c, &h, &w - )); - assert(input->data_format == CUDNN_TENSOR_NCHW); - auto* output = (Tensor*) create4DTensor( - (cudnnDataType_t) float_type, CUDNN_TENSOR_NCHW, n, c, h, w - ); - - // Hardcode the same algorithm as the baseline - cudnnConvolutionFwdAlgo_t convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; - - // Create workspace - size_t workspace_size; - checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize( - cudnnHandle, - input->tensor_desc, - filter->filter_desc, - convDesc, - output->tensor_desc, - convAlgo, - &workspace_size - )); - void* workspace; - checkCUDA(cudaMalloc(&workspace, workspace_size)); - // Create a zero tensor for z with the same shape as output - auto* z = (Tensor*) create4DTensor( - (cudnnDataType_t) float_type, CUDNN_TENSOR_NCHW, n, c, h, w - ); - std::memset(z->host_data, 0, z->num_elems * sizeof(float)); - hostToDeviceCopy(z); - // Create ReLU descriptor - cudnnActivationDescriptor_t reluDesc; - checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc)); - checkCUDNN(cudnnSetActivationDescriptor( - reluDesc, CUDNN_ACTIVATION_RELU, CUDNN_NOT_PROPAGATE_NAN, 0.0 - )); - - float alpha1 = 1.0, alpha2 = 0.0; - checkCUDNN(cudnnConvolutionBiasActivationForward( - cudnnHandle, - &alpha1, - input->tensor_desc, input->gpu_data, - filter->filter_desc, filter->gpu_data, - convDesc, convAlgo, - workspace, workspace_size, - &alpha2, - z->tensor_desc, z->gpu_data, - bias->tensor_desc, bias->gpu_data, - reluDesc, - output->tensor_desc, output->gpu_data - )); - checkCUDA(cudaDeviceSynchronize()); - - profileEvent("ConvBiasReLU_end", true); - return output; -}