diff --git a/hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc b/hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc
index ffb4c3a809b3e936f6c27ebd7c11aef5c4460104..e60e455e5a3020ef00c7dd9a03c14744c69bcdea 100644
--- a/hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc
+++ b/hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc
@@ -11,6 +11,7 @@
 
 using namespace std;
 
+
 class UnitTestResults {
 
 private:
@@ -38,8 +39,6 @@ public:
 
     float *data_ptr = (float *)res->host_data;
     for (unsigned int i = 0; i < res->num_elems; i++) {
-      // printf("**diff value = %f ", std::abs(data_ptr[i] -
-      // expected_result[i]));
       if (std::abs(data_ptr[i] - expected_result[i]) > epsilon) {
         failed_tests += 1;
         failed_test_ids.push_back(test_name);
@@ -73,514 +72,9 @@ public:
   }
 };
 
-void testTensorHgemm(UnitTestResults &unitTestResults) {
-
-  printf("***** TensorHgemm ***** \n\n");
-  void *lhs_ptr =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 5, 4, 1, 1);
-  struct Tensor *lhs = (struct Tensor *)lhs_ptr;
-  fillTensorWithOnes(lhs);
-
-  float *data_arr = (float *)lhs->host_data;
-  for (int i = 0; i < lhs->num_elems; i++) {
-    data_arr[i] = (i / 4) + 1;
-  }
-
-  void *rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 1, 4, 3);
-  fillTensorWithOnes(rhs);
-
-  void *output = tensorHalfGemm(lhs, rhs);
-  convertToFP32((struct Tensor *)output);
-
-  printTensorValues(output);
-
-  const float expected_result[15] = {4,  4,  4,  8,  8,  8,  12, 12,
-                                     12, 16, 16, 16, 20, 20, 20};
-
-  unitTestResults.evalTestResult((Tensor *)output, expected_result, 15, 0.01,
-                                 "Hgemm");
-}
-
-void testTensorSgemm(UnitTestResults &unitTestResults) {
-
-  printf("***** TensorSgemm ***** \n\n");
-  void *lhs_ptr =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 5, 4, 1, 1);
-  struct Tensor *lhs = (struct Tensor *)lhs_ptr;
-  fillTensorWithOnes(lhs);
-
-  float *data_arr = (float *)lhs->host_data;
-  for (int i = 0; i < lhs->num_elems; i++) {
-    data_arr[i] = (i / 4) + 1;
-  }
-
-  void *rhs = create4DTensor(CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, 1, 4, 3);
-  fillTensorWithOnes(rhs);
-
-  void *output = tensorGemmGPU(lhs, rhs);
-  printTensorValues(output);
-
-  const float expected_result[15] = {4,  4,  4,  8,  8,  8,  12, 12,
-                                     12, 16, 16, 16, 20, 20, 20};
-
-  unitTestResults.evalTestResult((Tensor *)output, expected_result, 15, 0.01,
-                                 "Sgemm");
-}
-
-void testTensorConcatAndSplit() {
-
-  int conv_mode = 1;         // CROSS_CORRELATION mode
-  int compute_precision = 0; // floating point precision
-
-  void *input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 3, 3);
-  fillWithOnesAndTwos(input);
-  void **splits = tensorSplit(input, 2, 1);
-
-  void *conv2W =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 2, 2);
-  fillTensorWithOnes(conv2W);
-
-  void **conv2fils = tensorSplit(conv2W, 2, 0);
-
-  void *conv2a_out = tensorConvolution(splits[0], conv2fils[0], 0, 0, 1, 1,
-                                       conv_mode, compute_precision);
-  printTensorDims(conv2a_out);
-
-  void *conv2b_out = tensorConvolution(splits[1], conv2fils[1], 0, 0, 1, 1,
-                                       conv_mode, compute_precision);
-  printTensorDims(conv2b_out);
-
-  void *conv2_outs[2];
-  conv2_outs[0] = conv2a_out;
-  conv2_outs[1] = conv2b_out;
-
-  void *conv2_concat_out = tensorConcat(conv2_outs, 2, 1);
-  printTensorDims(conv2_concat_out);
-  printTensorValues(conv2_concat_out);
-}
-
-void testLRN() {
-
-  void *input =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 20, 20, 20, 20);
-  fillTensorWithOnes(input);
-
-  unsigned LRN_window = 5;
-  double LRN_alpha = 2e-05;
-  printf("LRN_alpha = %f \n", LRN_alpha);
-
-  double LRN_beta = 0.75;
-  double LRN_k = 1.0;
-
-  // TEST-point - Compare TF vs CUDNN
-  void *lrn1out = tensorLRN(input, LRN_window, LRN_alpha, LRN_beta, LRN_k);
-  printTensorDims(lrn1out);
-  dumpWeightsToFile("tensors_out/lrn1_test.out", lrn1out);
-
-  void *input2 =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 7, 7, 7, 7);
-  fillTensorWithOnes(input2);
-
-  LRN_window = 5;
-  LRN_alpha = 0.5 * LRN_window;
-
-  LRN_beta = 0.75;
-  LRN_k = 1.0;
-
-  void *lrn2out = tensorLRN(input2, LRN_window, LRN_alpha, LRN_beta, LRN_k);
-  printTensorDims(lrn2out);
-  dumpWeightsToFile("tensors_out/lrn2_test.out", lrn2out);
-}
-
-void testTensorAdd() {
-
-  // Tensor add with equal dimensions
-  void *x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 2, 2);
-  void *bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 2, 2);
-  fillTensorWithOnes(x);
-  fillTensorWithOnes(bias);
-
-  printTensorValues(x);
-  printTensorValues(bias);
-
-  tensorAdd(x, bias);
-  printTensorValues(x);
-
-  // Tensor addd with matching channel dimension
-  void *x2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 2, 2);
-  void *bias2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 1, 1);
-  fillTensorWithOnes(x2);
-  fillTensorWithOnes(bias2);
-
-  tensorAdd(x2, bias2);
-  printTensorValues(x2);
-}
-
-void testTensorConv() {
-
-  void *input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 4, 4);
-  void *filter =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 3, 3);
-
-  fillTensorWithOnes(input);
-  fillTensorWithOnes(filter);
-
-  int conv_mode = 1;         // NOTE: uses CROSS_CORRELATION
-  int compute_precision = 0; // floating point precision for conv
-
-  void *conv_out = tensorConvolution(input, filter, 0, 0, 1, 1, conv_mode,
-                                     compute_precision);
-  printTensorValues(conv_out);
-}
-
-void testTensorHalfConv() {
-
-  void *input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 4, 4);
-  void *filter =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 2, 3, 3);
-
-  fillTensorWithOnes(input);
-  fillTensorWithOnes(filter);
-
-  int conv_mode = 1;         // NOTE: uses CROSS_CORRELATION
-  int compute_precision = 0; // floating point precision for conv
-
-  void *conv_out = tensorHalfConvolution(input, filter, 0, 0, 1, 1, conv_mode,
-                                         compute_precision);
-  printTensorValues(conv_out);
-}
-
-void testTensorGroupConv() {
-
-  // NOTE: The input channel count value (param2 to Tensor and Filter) must be
-  // the same
-  void *input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 4, 4);
-  void *filter =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 3, 3);
-
-  // FIXIT: fillTensor* calls should be replaced with initTensorValue(tenosor,
-  // val)
-  fillTensorWithOnes(input);
-  fillTensorWithOnes(filter);
-
-  int conv_mode = 1; // NOTE: uses CROSS_CORRELATION
-  int conv_groups = 2;
-
-  void *conv_out =
-      tensorConvolution(input, filter, 0, 0, 1, 1, conv_mode, conv_groups);
-  printTensorValues(conv_out);
-}
-
-void testTensorHalfGroupConv() {
-
-  // NOTE: The input channel count value (param2 to Tensor and Filter) must be
-  // the same
-  void *input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 2, 4, 4);
-  void *filter =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 3, 3);
-
-  fillTensorWithOnes(input);
-  fillTensorWithOnes(filter);
-
-  int conv_mode = 1; // NOTE: uses CROSS_CORRELATION
-  int conv_groups = 2;
-
-  void *conv_out =
-      tensorConvolution(input, filter, 0, 0, 1, 1, conv_mode, conv_groups);
-
-  convertToFP32((struct Tensor *)conv_out);
-
-  printTensorValues(conv_out);
-}
-
-void testTensorPooling() {
-
-  void *x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 4, 4);
-  fillTensorWithOnes(x);
-
-  float *data_arr = (float *)((Tensor *)x)->host_data;
-  for (int i = 0; i < ((Tensor *)x)->num_elems; i += 4) {
-    data_arr[i] = i;
-  }
-
-  void *output = tensorPooling(x, 0, 2, 2, 0, 0, 2, 2);
-  printTensorValues(output);
-}
-
-void testTensorHalfPooling() {
-
-  void *x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 4, 4);
-  fillTensorWithOnes(x);
-
-  float *data_arr = (float *)((Tensor *)x)->host_data;
-  for (int i = 0; i < ((Tensor *)x)->num_elems; i += 4) {
-    data_arr[i] = i;
-  }
-
-  void *output = tensorPooling(x, 0, 2, 2, 0, 0, 2, 2);
-  convertToFP32((struct Tensor *)output);
-
-  printTensorValues(output);
-}
-
-void testTensorBatchNorm() {
-
-  void *x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 2, 2);
-  fillTensorWithVal(x, 3);
-
-  void *gamma = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
-  fillTensorWithVal(gamma, 1);
-
-  void *beta = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
-  fillTensorWithVal(beta, 0);
-
-  void *mean = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
-  fillTensorWithVal(mean, 1);
-
-  void *variance =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
-  fillTensorWithVal(variance, 1);
-
-  double epsilon = 1;
-  // NOTE: result = X - mean / sqrt(epsilon + variance)
-  void *output = tensorBatchNorm(x, gamma, beta, mean, variance, 1);
-
-  printTensorValues(output);
-}
-
-void testTensorHalfBatchNorm() {
-
-  void *x = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 2, 2);
-  fillTensorWithVal(x, 3);
-
-  void *gamma = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
-  fillTensorWithVal(gamma, 1);
-
-  void *beta = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
-  fillTensorWithVal(beta, 0);
-
-  void *mean = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
-  fillTensorWithVal(mean, 1);
-
-  void *variance =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 1, 1);
-  fillTensorWithVal(variance, 1);
-
-  double epsilon = 1;
-  // NOTE: result = X - mean / sqrt(epsilon + variance)
-  void *output = tensorBatchNorm(x, gamma, beta, mean, variance, 1);
-  convertToFP32((struct Tensor *)output);
-
-  printTensorValues(output);
-}
-
-void testTensorRelu() {
-
-  // NOTE: 2nd dim of bias and d2*d3*d4 for the input tensor MUST match
-  printf("***** TensorRelu ***** \n\n");
-  void *input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 1, 2, 2);
-  fillTensorWithNegOnes(input);
-
-  void *output = tensorRelu(input);
-  printTensorValues(output);
-}
-
-void testTensorSoftmax() {
-
-  printf("***** TensorSoftmax ***** \n\n");
-  void *input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 4, 1, 1);
-
-  float *host_ptr = (float *)((struct Tensor *)input)->host_data;
-  host_ptr[0] = 0.1;
-  host_ptr[1] = 0.2;
-  host_ptr[2] = 0.3;
-  host_ptr[3] = 0.4;
-  host_ptr[4] = 0.5;
-  host_ptr[5] = 0.6;
-  host_ptr[6] = 0.7;
-  host_ptr[7] = 2.5;
-
-  void *output = tensorSoftmax(input);
-  printTensorValues(output);
-}
-
-void testSoftmaxOutput(void *output_ptr) {
-
-  struct Tensor *output = (struct Tensor *)output_ptr;
-
-  size_t batch_dim = output->dims.dim_sizes[0];
-  size_t channels = output->dims.dim_sizes[1];
-
-  float *data = (float *)output->host_data;
-  for (int i = 0; i < batch_dim; i++) {
-    float sum = 0.0;
-    for (int j = 0; j < channels; j++) {
-      sum += data[i * channels + j];
-    }
-    printf("output_sum = %f \n", sum);
-  }
-}
-
-void testPromiseError() {
-
-  printf("***** TensorQuantize ***** \n\n");
-  void *input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 6, 1, 1);
-  float *host_ptr = (float *)((struct Tensor *)input)->host_data;
-
-  void *gold_tensor =
-      create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 6, 1, 1);
-  float *gold_ptr = (float *)((struct Tensor *)gold_tensor)->host_data;
-
-  gold_ptr[0] = -1;
-  gold_ptr[1] = -2;
-  gold_ptr[2] = -3;
-  gold_ptr[3] = -4;
-  gold_ptr[4] = -5;
-  gold_ptr[5] = 0;
-  gold_ptr[6] = 5;
-  gold_ptr[7] = 4;
-  gold_ptr[8] = 3;
-  gold_ptr[9] = 2;
-  gold_ptr[10] = 1;
-  gold_ptr[11] = 1;
-
-  int num_elems = 12;
-  int num_runs = 1000;
-
-  float *result_ptr = (float *)malloc(sizeof(float) * num_elems);
-
-  for (int swing = 1; swing <= 7; swing++) {
-
-    for (int j = 0; j < num_elems; j++) {
-      result_ptr[j] = 0;
-    }
-
-    float error_sum = 0.0;
-
-    for (int i = 0; i < 1000; i++) {
-      host_ptr[0] = -1;
-      host_ptr[1] = -2;
-      host_ptr[2] = -3;
-      host_ptr[3] = -4;
-      host_ptr[4] = -5;
-      host_ptr[5] = 0;
-      host_ptr[6] = 5;
-      host_ptr[7] = 4;
-      host_ptr[8] = 3;
-      host_ptr[9] = 2;
-      host_ptr[10] = 1;
-      host_ptr[11] = 1;
-
-      void *error_out = addPromiseError(input, swing);
-      // printTensorValues(error_out);
-
-      // Move result data back to the host
-      hpvm_request_tensor(input, 0);
-      float *error_out_ptr = (float *)((struct Tensor *)input)->host_data;
-
-      for (int j = 0; j < num_elems; j++) {
-        result_ptr[j] += error_out_ptr[j];
-        error_sum +=
-            (error_out_ptr[j] - gold_ptr[j]) * (error_out_ptr[j] - gold_ptr[j]);
-      }
-    }
 
-    printf("\n\n - Swing %d results : \n", swing);
-    for (int j = 0; j < num_elems; j++) {
-      result_ptr[j] = result_ptr[j] / num_runs;
-      printf(" %f ", result_ptr[j]);
-    }
-
-    printf("mean_error = %f \n", error_sum / num_runs);
 
-    printf(" \n");
-  }
-}
-
-void testQuantization() {
-
-  printf("***** TensorQuantize ***** \n\n");
-  void *input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 2, 6, 1, 1);
-
-  float *host_ptr = (float *)((struct Tensor *)input)->host_data;
-  host_ptr[0] = -0.1;
-  host_ptr[1] = -25;
-  host_ptr[2] = 0.2;
-  host_ptr[3] = -0.4;
-  host_ptr[4] = 1.7;
-  host_ptr[5] = -2.9;
-  host_ptr[6] = 0.7;
-  host_ptr[7] = 0.99;
-  host_ptr[8] = 7;
-  host_ptr[9] = 7.2;
-  host_ptr[10] = 2.5;
-  host_ptr[11] = 3;
-
-  void *quantize_result1 = quantizeTensorPromise(input, -4, 6);
-
-  printf("\n ** quantizing with range min = %d max = %d \n", -4, 6);
-  printTensorValues(quantize_result1);
-
-  host_ptr[0] = -0.1;
-  host_ptr[1] = -25;
-  host_ptr[2] = 0.2;
-  host_ptr[3] = -0.4;
-  host_ptr[4] = 1.7;
-  host_ptr[5] = -2.9;
-  host_ptr[6] = 0.7;
-  host_ptr[7] = 0.99;
-  host_ptr[8] = 7;
-  host_ptr[9] = 7.2;
-  host_ptr[10] = 2.5;
-  host_ptr[11] = 3;
-
-  void *quantize_result2 = quantizeTensorPromise(input, -2, 2);
-
-  printf("\n ** quantizing with range min = %d max = %d \n", -2, 2);
-  printTensorValues(quantize_result2);
-
-  host_ptr[0] = -0.1;
-  host_ptr[1] = -25;
-  host_ptr[2] = 0.2;
-  host_ptr[3] = -0.4;
-  host_ptr[4] = 1.7;
-  host_ptr[5] = -2.9;
-  host_ptr[6] = 0.7;
-  host_ptr[7] = 0.99;
-  host_ptr[8] = 7;
-  host_ptr[9] = 7.2;
-  host_ptr[10] = 2.5;
-  host_ptr[11] = 3;
-
-  void *quantize_result3 = quantizeTensorPromise(input, -25, 8);
-
-  printf("\n ** quantizing with range min = %d max = %d \n", -25, 8);
-  printTensorValues(quantize_result3);
-
-  printf("\n ** quantizing with range min = %d max = %d \n", -10, 10);
-
-  host_ptr[0] = -0.1;
-  host_ptr[1] = -25;
-  host_ptr[2] = 0.2;
-  host_ptr[3] = -0.4;
-  host_ptr[4] = 1.7;
-  host_ptr[5] = -2.9;
-  host_ptr[6] = 0.7;
-  host_ptr[7] = 0.99;
-  host_ptr[8] = 7;
-  host_ptr[9] = 7.2;
-  host_ptr[10] = 2.5;
-  host_ptr[11] = 3;
 
-  void *quantize_result4 = quantizeTensorPromise(input, -10, 10);
-  printTensorValues(quantize_result4);
-
-  void *quantize_result5 = quantizeTensorPromise(input, -10, 10);
-  printTensorValues(quantize_result5);
-
-  // void* error_out = addPromiseError(quantize_result, 1);
-  // printTensorValues(error_out);
-}
 
 void testSampleFilter() {
 
@@ -726,23 +220,6 @@ void testPerforation(UnitTestResults &unitTestResults) {
       (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 3, 3);
   fillTensorWithVal(filter, 1);
 
-  /*
-  float* host_ptr = (float*) ((struct Tensor*) filter)->host_data;
-  host_ptr[0] = 2;
-  host_ptr[2] = 2;
-  host_ptr[4] = 2;
-  host_ptr[6] = 2;
-  host_ptr[8] = 2;
-  host_ptr[10] = 2;
-  host_ptr[12] = 2;
-  host_ptr[14] = 2;
-  host_ptr[16] = 2;
-  host_ptr[18] = 2;
-  host_ptr[20] = 2;
-  host_ptr[22] = 2;
-  host_ptr[24] = 2;
-  host_ptr[26] = 2;
-  */
 
   testPerforationCalls(input, filter, 0, 0, 1, 1, 1, 2, unitTestResults);
 
@@ -763,7 +240,6 @@ void testSampling() {
   Tensor *input =
       (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 4, 4);
   fillTensorWithVal(input, 1);
-  // fillWithOnesAndTwos(input);
 
   Tensor *filter =
       (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 3, 3, 3);
@@ -982,94 +458,7 @@ void testSampling_1_1(UnitTestResults &unitTestResults) {
   testSamplingCalls(input, filter, 1, 1, 1, 1, 4, "1_1", unitTestResults);
 }
 
-void *testTensorArgMax() {
-
-  Tensor *input =
-      (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 4, 3, 1, 1);
-
-  float *host_ptr = (float *)((struct Tensor *)input)->host_data;
-
-  // Input 0
-  host_ptr[0] = 1;
-  host_ptr[1] = 7; // highest - max index = 1
-  host_ptr[2] = 3;
-
-  // Input 1
-  host_ptr[3] = 3;
-  host_ptr[4] = 3;
-  host_ptr[5] = 8; // highest - max index = 2
-
-  // Input 2
-  host_ptr[6] = 2;
-  host_ptr[7] = 5;
-  host_ptr[8] = 9; // highest - max index = 2
-
-  // Input 3
-  host_ptr[9] = 11; // highest - max index = 0
-  host_ptr[10] = 2;
-  host_ptr[11] = 8;
-
-  void *argmax_out = tensorArgMax(input);
-
-  // Expect Output of call below to be:
-  //   1    2    2    0
-  printTensorValues(argmax_out);
-
-  return argmax_out;
-}
-
-void *testTensorSelect(void *argmax_out) {
-
-  void *select_out = tensorSelect(argmax_out, 2);
-  printf("***** tensorSelect output \n");
-
-  printTensorValues(select_out);
-
-  return select_out;
-}
-
-void testTensorContract(void *select_out) {
-
-  Tensor *input =
-      (Tensor *)create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 4, 4, 1, 1);
-  float *host_ptr = (float *)((struct Tensor *)input)->host_data;
-
-  // Input 0
-  host_ptr[0] = 1;
-  host_ptr[1] = 1;
-  host_ptr[2] = 1;
-  host_ptr[3] = 1;
-
-  // Input 1
-  host_ptr[4] = 2;
-  host_ptr[5] = 2;
-  host_ptr[6] = 2;
-  host_ptr[7] = 2;
-
-  // Input 2
-  host_ptr[8] = 3;
-  host_ptr[9] = 3;
-  host_ptr[10] = 3;
-  host_ptr[11] = 3;
-
-  // Input 3
-  host_ptr[12] = 4;
-  host_ptr[13] = 4;
-  host_ptr[14] = 4;
-  host_ptr[15] = 4;
-
-  void *contract_out = tensorContract(input, select_out);
-  printf("***** tensorContract output \n");
-
-  printTensorValues(contract_out);
-}
-
-void testNewTensorOps() {
 
-  void *argmax_out = testTensorArgMax();
-  void *select_out = testTensorSelect(argmax_out);
-  testTensorContract(select_out);
-}
 
 int main() {
 
@@ -1077,44 +466,11 @@ int main() {
 
   UnitTestResults unitTestResults;
 
-  // Function call per unit test
-  testTensorHgemm(unitTestResults);
-  testTensorSgemm(unitTestResults);
-
-  /*
-  testTensorConv();
-  testTensorHalfConv();
-
-  testTensorGroupConv();
-  testTensorHalfGroupConv();
-
-  testTensorBatchNorm();
-  testTensorHalfBatchNorm();
-
-  testTensorPooling();
-  testTensorHalfPooling();
-
-  */
-
   testSampling_3_3(unitTestResults);
   testSampling_1_1(unitTestResults);
-
   testPerforation(unitTestResults);
-
   unitTestResults.printSummary();
 
-  // testTensorError();
-  // testQuantization();
-  // testTensorGemm();
-  // testTensorGemmGPU();
-  // testTensorGemmBias();
-  // testTensorConv2();
-  // testTensorConv3();
-  // testLRN();
-  // testSampleFilter();
-  // testNewTensorOps();
-  // testQuantization();
-  // testPromiseError();
 
   return 0;
 }