diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
index 6e2269a72f3405170c1ab00b42af8d0701e752f5..8deddc88264bc4327bec8dad1709eac0d1a40322 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_runtime.h
@@ -120,13 +120,7 @@ extern "C"{
   void* tensorHalfBatchNorm(void* input_ptr, void* gamma_ptr, void* beta_ptr,
 			    void* mean_ptr, void* variance_ptr, double epsilon);
 
-  void* tensorConvBiasReLU(
-      void* input_ptr, void* filter_ptr, void *bias_ptr,
-		  int vertical_pad, int horizontal_pad,
-		  int vertical_stride, int horizontal_stride,
-		  int conv_mode, int conv_groups
-  );
-
+  
   /* Error injection API - used for accuracy tuning */
   void* tensorAddError(void* x_ptr, int error_scale);
   
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
index c342a88a54d9785344449ac9e496ecd96cc2effc..a80b2f726f7d9d8fd120cfc7390daf9f00b819c6 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu
@@ -21,8 +21,6 @@
 #include <sstream>
 #include <string>
 #include <vector>
-#include <cassert>
-#include <cstring>
 
 #include <cuda_runtime.h>
 #include <device_launch_parameters.h>
@@ -1284,102 +1282,7 @@ void* FCLayer_GPU(void* input,
   return activation_out;
 }
 
-void* tensorConvBiasReLU(
-    void* input_ptr, void* filter_ptr, void *bias_ptr,
-		int vertical_pad, int horizontal_pad,
-		int vertical_stride, int horizontal_stride,
-		int conv_mode, int conv_groups
-) {
-  profileEvent("ConvBiasReLU");
-
-  // Check, move, convert inputs
-  auto *input = (Tensor *) input_ptr;
-  auto *filter = (Tensor *) filter_ptr;
-  auto *bias = (Tensor *) bias_ptr;
-  if (
-    input->tensor_desc == nullptr || filter->filter_desc == nullptr ||
-    bias->tensor_desc == nullptr
-  )
-    ERROR("Tensor descriptor is NULL");
-  hostToDeviceCopy(input);
-  hostToDeviceCopy(filter);
-  hostToDeviceCopy(bias);
-  convertToFP32(input);
-  convertToFP32(filter);
-  convertToFP32(bias);
 
-  // Create convolution descriptor
-  cudnnConvolutionDescriptor_t convDesc;
-  checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc));
-  checkCUDNN(cudnnSetConvolution2dDescriptor(
-    convDesc,
-		vertical_pad, horizontal_pad, // conv padding
-		vertical_stride, horizontal_stride, // conv strides
-		1, 1, // dilation = 1
-		CUDNN_CROSS_CORRELATION, // Use correlation mode
-    CUDNN_DATA_FLOAT // use float precision
-  ));
-  // NOTE: Adding support for grouped convolution
-  checkCUDNN(cudnnSetConvolutionGroupCount(convDesc, conv_groups));
 
-  // Use convolution descriptor to get output size, and create output tensor.
-  int n, c, h, w;
-  checkCUDNN(cudnnGetConvolution2dForwardOutputDim(
-    convDesc, input->tensor_desc, filter->filter_desc,
-		&n, &c, &h, &w
-  ));
-  assert(input->data_format == CUDNN_TENSOR_NCHW);
-  auto* output = (Tensor*) create4DTensor(
-    (cudnnDataType_t) float_type, CUDNN_TENSOR_NCHW, n, c, h, w
-  );
-
-  // Hardcode the same algorithm as the baseline
-  cudnnConvolutionFwdAlgo_t convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
-
-  // Create workspace
-  size_t workspace_size;
-  checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(
-    cudnnHandle,
-		input->tensor_desc,
-		filter->filter_desc,
-		convDesc,
-		output->tensor_desc,
-		convAlgo,
-		&workspace_size
-  ));
-  void* workspace;
-  checkCUDA(cudaMalloc(&workspace, workspace_size)); 
 
-  // Create a zero tensor for z with the same shape as output
-  auto* z = (Tensor*) create4DTensor(
-    (cudnnDataType_t) float_type, CUDNN_TENSOR_NCHW, n, c, h, w
-  );
-  std::memset(z->host_data, 0, z->num_elems * sizeof(float));
-  hostToDeviceCopy(z);
 
-  // Create ReLU descriptor
-  cudnnActivationDescriptor_t reluDesc;
-  checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc));
-  checkCUDNN(cudnnSetActivationDescriptor(
-    reluDesc, CUDNN_ACTIVATION_RELU, CUDNN_NOT_PROPAGATE_NAN, 0.0
-  ));
-
-  float alpha1 = 1.0, alpha2 = 0.0;
-  checkCUDNN(cudnnConvolutionBiasActivationForward(
-    cudnnHandle,
-    &alpha1,
-    input->tensor_desc, input->gpu_data,
-    filter->filter_desc, filter->gpu_data,
-    convDesc, convAlgo,
-    workspace, workspace_size,
-    &alpha2,
-    z->tensor_desc, z->gpu_data,
-    bias->tensor_desc, bias->gpu_data,
-    reluDesc,
-    output->tensor_desc, output->gpu_data
-  ));
-  checkCUDA(cudaDeviceSynchronize());
-
-  profileEvent("ConvBiasReLU_end", true);
-  return output;
-}