diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
index 1f6dd875ffa6b39ab57609d7690c9a9ad3944b44..fa252a3e0ce063697d56e771afbfbde69d0c5641 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm.cpp.in
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 
 {% for node in nodes %}
diff --git a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in
index 94a8e0a534c04b323b4b66f369ab2d624a2a745f..8074704ece0988d7897c1e93b41f1ea3c43deb35 100644
--- a/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in
+++ b/hpvm/projects/torch2hpvm/torch2hpvm/template_hpvm_inspect.cpp.in
@@ -2,7 +2,6 @@
 #include <string>
 #include <array>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 
 // For writing binary to file descriptors
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
index 9f34317d34157d57468c60cb854828b5c54f1cde..487adc4fa58e0b1ad13402156fdbcbdbe6026aea 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
@@ -12,7 +12,7 @@ function(compile_hpvm_c target_name src_filepath codegen_target)
     DEPENDS ${generated_file_path} hpvm-clang
     COMMAND hpvm-clang
       ${generated_file_path} ${output_bin_path} -O3 -fno-exceptions
-      -t ${codegen_target} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${ARGN}
+      -t ${codegen_target} ${ARGN}
   )
   add_custom_target(${target_name} DEPENDS ${output_bin_path})
   set(test_compile_targets ${test_compile_targets} ${target_name} PARENT_SCOPE)
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
index 39f49784d76470c4e0bab213127369806e1e2531..2faf1413bcdb7c87e280107d38913ae86740a414 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
index dafd1a6ae084c4e1bf819ce1ac94e667c696eb24..bca6ca47cd48015524b496b90219f24e1f27ddb9 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
index 64350c590bb181fa4eaab4b2bf5fb37f69e11c09..d274d52ec18af99393f47d9fdb69b0b593dcbefc 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
index 72af2ff4a1b33aabac427d203101c32c4a7403c7..e82985d04fea11c1d30079e4eacbbee81c95080a 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
index 37e7a34a51a14b6903d549f271d3c0c83822fec8..c058e913c9f7c5bca6eb304759a380d319495caf 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
@@ -1,7 +1,6 @@
 #include <config.h>
 #include <hpvm.h>
 #include <string>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 
 void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) {
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
index 1206d7bac4b9dcff2b4cfd7183f4a3e5f65d73d9..26e717fd732567eb9e6b97f19c60428e564fc9e5 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
index d7ab4238ebac5598b92c432aced85a602bb5ce89..8185d9dc69b6899cad46833d71d18be01653bfb3 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
index 26acc65a99287ea9f20e037dd996635315d76e48..a0cd32151e5743d51df34edbe041e0fe8485aced 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
index 5f8c63dbfbfb800dc6f60f9ed9a6108dee0a9a48..77b448d81d1b352f8ac4ee9e3fc943e69f466772 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
index 2070089053ef0b6e7e0ca33c2c6cc4cea17b8e29..adb140bd699e74be7199f54888ee4249e5515004 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
index 5b580f26821e67cc96c8347e485b792f40105176..ef94b055bd6a741405c4c9da55958143d3b8c4d1 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
@@ -1,7 +1,6 @@
 
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
index 735e2c9abab91f00560faa5496e234321027b82c..ecfa22957352ca2c418c5beb9b041762da9b6de9 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
@@ -1,7 +1,6 @@
 
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
index 160563064cc47effd463c4915b0c7f0d93bff56f..37a4111411229602ca18f806c2186af54728081e 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
index c5cf2cb3a0177a5cce9ad0cf460484e63ded0ecd..1ac5141bca54d7dc60bb63c09cde9dcb8f8c6d32 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
index bec6139c2d089e90d09fa239e1b15c9a835fd4ea..c1de0703df94b3f27dfd55b0379377ecf5f0edbe 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
index 4fa7d5c121bacff122821fe983ed443e3c6db249..7bda1213358d0c37d16623425bf19bace4d3a715 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
index 8666030fba4390d29d9324f5a5c7d60324325f05..bee78428df49c52f06bfa618afd7920d113e1647 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
index 6d01caa3b7c0875cff4f3e16131ddd09195e92b7..c12855437b28686528ff4c916a987bfa7b2f280e 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
index b1b2b4f2e312b6372e10a2fce3ef12eab2dddded..b046f4255185e47b44be1a78ca29c05189fc894b 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
index eb29e45805671072428318412f27b05d0da90199..b06c992f3c2108544676c6e7f27810e3ef7244fc 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
@@ -1,6 +1,5 @@
 #include <string>
 #include <hpvm.h>
-#include <tensorTypes.h>
 #include <tensorUtils.h>
 #include <config.h>
 
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorTypes.h b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorTypes.h
deleted file mode 100644
index 726080efe7e1a06363e7fca191f9708219d5baeb..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorTypes.h
+++ /dev/null
@@ -1,39 +0,0 @@
-
-#ifndef TYPES_HEADER
-#define TYPES_HEADER
-
-
-/*struct Dimension_t{
-  int num_dims;
-  size_t* dim_sizes;
-};
-
-
-struct Tensor_t{
-  int tensor_id; // used for indexing (in the tensor runtime)
-  int data_type; // {float_type, double_type, half_type, int_type}
-  int data_format; // {nchw, nhwc}
-  void* host_data;
-  size_t num_elems; // Total elements
-  size_t size_in_bytes; // Total size in bytes
-  struct Dimension_t dims;
-};
-
-
-enum Tensor_type_t{
-  float_type,
-  double_type,
-  half_type,
-  int_type
-};
-
-
-// NOTE: Currently only NCHW is supported due to limited cuDNN support
-enum Tensor_format_t{
-  nchw,
-  nhwc 
-};
-
-*/
-
-#endif
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
deleted file mode 100644
index 1d5ac7d908b0990f21de885c645786997640264c..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h
+++ /dev/null
@@ -1,758 +0,0 @@
-
-// Header guards
-#ifndef UTILS_HEADER
-#define UTILS_HEADER
-
-#include <sstream>
-#include <vector>
-#include <bits/stdc++.h>
-#include <tensor_runtime.h>
-#include <tensor.h>
-#include <cmath>
-
-std::vector<float> run_accuracies;
-
-void printTensorInfo(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  if (tensor->gpu_data != NULL) {
-    printf("Successful cudaMalloc \n");
-  }
-
-  printf("tensor dims = %d \n", tensor->dims.num_dims);
-  printf("dim1_size = %lu \n", tensor->dims.dim_sizes[0]);
-  printf("dim2_size = %lu \n", tensor->dims.dim_sizes[1]);
-  printf("num_elems = %lu \n", tensor->num_elems);
-}
-
-// FIXIT: Move this to debug.h and include in all files
-void dumpWeightsToFile(char *file_name, void *weights_ptr) {
-
-  struct Tensor *weights = (Tensor *)weights_ptr;
-  // Move data back to host
-  hpvm_request_tensor(weights, 0);
-
-  FILE *fp = fopen(file_name, "wb");
-  if (fp == NULL) {
-    printf("File %s could not be created. Check if directory exists \n",
-           file_name);
-    abort();
-  }
-
-  // printf("size_in_bytes = %lu \n", weights->size_in_bytes);
-  size_t bytes_written =
-      fwrite(weights->host_data, 1, weights->size_in_bytes, fp);
-  // printf("bytes_written = %lu \n", bytes_written);
-  fclose(fp);
-}
-
-void fillTensorWithOnes(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  // initialization is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems; i++) {
-      data_arr[i] = 1.0;
-    }
-  }
-}
-
-void fillWithOnesAndTwos(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  // initialization is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems / 2; i++) {
-      data_arr[i] = 1.0;
-    }
-
-    for (unsigned int i = tensor->num_elems / 2; i < tensor->num_elems; i++) {
-      data_arr[i] = 2.0;
-    }
-  }
-}
-
-void fillTensorWithVal(void *tensor_ptr, float target_value) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  // initialization is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems; i++) {
-      data_arr[i] = target_value;
-    }
-  }
-}
-
-void fillTensorWithNegOnes(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  // initialization is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems; i++) {
-      data_arr[i] = -1.0;
-    }
-  }
-}
-
-void fillTensorVals(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-  // initialization is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems; i++) {
-      data_arr[i] = i + 1;
-    }
-  }
-}
-
-void printTensorValues(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  // printing is specific to the floating point type
-  if (tensor->data_type == CUDNN_DATA_FLOAT) {
-    float *data_arr = (float *)tensor->host_data;
-    for (unsigned int i = 0; i < tensor->num_elems; i++) {
-      printf("%f,", data_arr[i]);
-    }
-  }
-
-  printf("\n");
-}
-
-void printTensorDims(void *tensor_ptr) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  printf("Num_elems = %lu \n", tensor->num_elems);
-  for (int i = 0; i < tensor->dims.num_dims; i++) {
-    printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]);
-  }
-}
-
-void compareTensors(void *tensor1_ptr, void *tensor2_ptr) {
-
-  struct Tensor *tensor1 = (struct Tensor *)tensor1_ptr;
-  struct Tensor *tensor2 = (struct Tensor *)tensor2_ptr;
-
-  hpvm_request_tensor(tensor1, 0);
-  hpvm_request_tensor(tensor2, 0);
-
-  float *tensor_data1 = (float *)tensor1->host_data;
-  float *tensor_data2 = (float *)tensor2->host_data;
-
-  for (unsigned int i = 0; i < tensor1->num_elems; i++) {
-    if (tensor_data1[i] != tensor_data2[i]) {
-      printf("Tensor data mismatch at index %d \n", i);
-      abort();
-    }
-  }
-}
-
-void compareValues(void *tensor_ptr, float *data, size_t num_elems) {
-
-  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
-
-  hpvm_request_tensor(tensor, 0);
-
-  float *tensor_data = (float *)tensor->host_data;
-  for (unsigned int i = 0; i < num_elems; i++) {
-    if (tensor_data[i] != data[i]) {
-      printf("Tensor data mismatch");
-      abort();
-    }
-  }
-}
-
-void *readInputTensor(const char *file_name, int data_type, int dim1_size,
-                      int dim2_size, int dim3_size, int dim4_size) {
-
-  int type_size = 4; // NOTE: Assuming floating point tensors
-  int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
-  int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size;
-  uint8_t *file_data = (uint8_t *)malloc(sizeof(char) * num_elems);
-  float *tensor_data = (float *)malloc(sizeof(float) * num_elems);
-  int file_header_size = 16;
-
-  FILE *file = fopen(file_name, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting... \n", file_name);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_CUR); // Skipping the file header
-  size_t bytes_read = fread(file_data, 1, sizeof(uint8_t) * num_elems, file);
-
-  fclose(file);
-
-  for (size_t i = 0; i < num_elems; ++i) {
-    tensor_data[i] = (float)file_data[i] / 255.0f;
-  }
-
-  // NOTE: Using NCHW format
-  struct Tensor *input = (struct Tensor *)create4DTensor(
-      data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size);
-
-  initTensorData(input, tensor_data, size_in_bytes);
-  //  compareValues(input, tensor_data, num_elems);
-
-  return input;
-}
-
-//*** FIXIT: Move this to CPU-only
-struct Tensor *readTrainedWeightsCPU(const char *file_name, int data_type,
-                                     int dim1_size, int dim2_size,
-                                     int dim3_size, int dim4_size) {
-
-  // FIXIT: Don't assume floating point types
-  int type_size = 4; // NOTE: Assuming floating point tensors
-  long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
-  long int size_in_bytes =
-      type_size * dim1_size * dim2_size * dim3_size * dim4_size;
-  float *tensor_data = (float *)malloc(sizeof(float) * num_elems);
-  int file_header_size = 0;
-
-  FILE *file = fopen(file_name, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting... \n", file_name);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_CUR); // Skipping the file header
-  size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
-
-  printf("size in bytes = %lu, bytes read = %lu \n", size_in_bytes, bytes_read);
-
-  fclose(file);
-
-  struct Tensor *weights = (struct Tensor *)create4DTensor(
-      data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size);
-
-  initTensorData(weights, tensor_data, size_in_bytes);
-  // compareValues(weights, tensor_data, num_elems);
-  free(tensor_data);
-
-  return weights;
-}
-
-struct Tensor *readTrainedWeights(const char *file_name, int data_type,
-                                  long int dim1_size, long int dim2_size,
-                                  long int dim3_size, long int dim4_size) {
-
-  // FIXIT: Don't assume floating point types
-  int type_size = 4; // NOTE: Assuming floating point tensors
-  long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
-  long int size_in_bytes =
-      type_size * dim1_size * dim2_size * dim3_size * dim4_size;
-  float *tensor_data = (float *)malloc(sizeof(float) * num_elems);
-  printf("size_in_bytes  = %lu \n", size_in_bytes);
-
-  int file_header_size = 0;
-
-  FILE *file = fopen(file_name, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting... \n", file_name);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_CUR); // Skipping the file header
-  size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
-
-  // printf("size in bytes = %lu, bytes read = %lu \n", size_in_bytes,
-  // bytes_read);
-
-  fclose(file);
-
-  struct Tensor *weights = (struct Tensor *)create4DTensor(
-      data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size);
-
-  initTensorData(weights, tensor_data, size_in_bytes);
-  // compareValues(weights, tensor_data, num_elems);
-  free(tensor_data);
-
-  return weights;
-}
-
-struct Tensor *readInputBatch(const char *file_name, long data_type, long start,
-                              long end, long dim2_size, long dim3_size,
-                              long dim4_size) {
-
-  long int dim1_size = end - start;
-  // FIXIT: Don't assume floating point types
-  long int type_size = 4; // NOTE: Assuming floating point tensors
-  long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size;
-  long int size_in_bytes =
-      type_size * dim1_size * dim2_size * dim3_size * dim4_size;
-  float *tensor_data = (float *)malloc(sizeof(float) * num_elems);
-  long int file_header_size =
-      type_size * start * dim2_size * dim3_size * dim4_size;
-
-  FILE *file = fopen(file_name, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting... \n", file_name);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_SET); // Skipping the file header
-  size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
-
-  fclose(file);
-
-  // printf ("FIXED input BATCH read \n");
-
-  struct Tensor *weights = (struct Tensor *)create4DTensor(
-      data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size);
-
-  initTensorData(weights, tensor_data, size_in_bytes);
-  free(tensor_data);
-
-  return weights;
-}
-
-uint8_t *readLabels(const char *labels_file, int num_labels) {
-
-  uint8_t *labels = (uint8_t *)malloc(sizeof(uint8_t) * num_labels);
-  FILE *file = fopen(labels_file, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting...\n", labels_file);
-    abort();
-  }
-
-  size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file);
-
-  fclose(file);
-
-  return labels;
-}
-
-uint32_t *readLabels3(const char *labels_file, int num_labels) {
-
-  uint32_t *labels = (uint32_t *)malloc(sizeof(uint32_t) * num_labels);
-  FILE *file = fopen(labels_file, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting...\n", labels_file);
-    abort();
-  }
-
-  size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file);
-
-  fclose(file);
-
-  return labels;
-}
-
-uint8_t *readLabelsBatch(const char *labels_file, int start, int end) {
-
-  int num_labels = end - start;
-  int file_header_size = sizeof(uint8_t) * start;
-
-  uint8_t *labels = (uint8_t *)malloc(sizeof(uint8_t) * num_labels);
-  FILE *file = fopen(labels_file, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting...\n", labels_file);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_SET); // Skipping the file header
-
-  size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file);
-
-  fclose(file);
-
-  // printf("--labels bytes_read = %lu \n", bytes_read);
-  return labels;
-}
-
-uint32_t *readLabelsBatch3(const char *labels_file, int start, int end) {
-
-  int num_labels = end - start;
-  int file_header_size = sizeof(uint32_t) * start;
-
-  uint32_t *labels = (uint32_t *)malloc(sizeof(uint32_t) * num_labels);
-  FILE *file = fopen(labels_file, "rb");
-  if (file == NULL) {
-    printf("Data file %s is not found. Aborting...\n", labels_file);
-    abort();
-  }
-
-  fseek(file, file_header_size, SEEK_SET); // Skipping the file header
-
-  size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file);
-
-  fclose(file);
-
-  return labels;
-}
-
-void computeAccuracy(const char *labels_file, int num_labels,
-                     void *result_ptr) {
-
-  struct Tensor *result = (struct Tensor *)result_ptr;
-
-  uint8_t *labels = readLabels(labels_file, num_labels);
-  size_t batch_dim = result->dims.dim_sizes[0];
-  size_t channels = result->dims.dim_sizes[1];
-  float *data = (float *)result->host_data;
-  int num_errors = 0;
-
-  for (int i = 0; i < batch_dim; i++) {
-    int chosen = 0;
-    for (int id = 1; id < 10; ++id) {
-      if (data[i * channels + chosen] < data[i * channels + id])
-        chosen = id;
-    }
-
-    // printf("chosen = %d, label = %d \n", chosen, labels[i]);
-    if (chosen != labels[i])
-      num_errors++;
-  }
-
-  float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
-  printf("****** Accuracy = %f \n\n", accuracy);
-
-  FILE *fp = fopen("final_accuracy", "w+");
-  if (fp != NULL) {
-
-    std::ostringstream ss;
-    ss << std::fixed << accuracy;
-    std::string print_str = ss.str();
-
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-    fclose(fp);
-  }
-}
-
-// NOTE: batch_size and num_classes are Unused arguments
-float computeAccuracy2(uint8_t *labels, int batch_size, void *result_ptr,
-                       size_t num_classes = 10) {
-
-  struct Tensor *result = (struct Tensor *)result_ptr;
-
-  size_t batch_dim = result->dims.dim_sizes[0];
-  num_classes = result->dims.dim_sizes[1];
-  float *data = (float *)result->host_data;
-  int num_errors = 0;
-
-  printf("batch_dim = %lu, channels = %lu \n", batch_dim, num_classes);
-
-  for (unsigned int i = 0; i < batch_dim; i++) {
-
-    int chosen = 0;
-    for (int id = 1; id < num_classes; ++id) {
-      if (data[i * num_classes + chosen] < data[i * num_classes + id])
-        chosen = id;
-    }
-
-    if (chosen != labels[i])
-      num_errors++;
-  }
-
-  float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
-  printf("****** Accuracy = %f \n\n", accuracy);
-
-  FILE *fp = fopen("final_accuracy", "w+");
-  if (fp != NULL) {
-
-    std::ostringstream ss;
-    ss << std::fixed << accuracy;
-    std::string print_str = ss.str();
-
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-
-  return accuracy;
-}
-
-float computeAccuracy3(uint32_t *labels, void *result_ptr) {
-
-  struct Tensor *result = (struct Tensor *)result_ptr;
-
-  size_t batch_dim = result->dims.dim_sizes[0];
-  size_t num_classes = result->dims.dim_sizes[1];
-  float *data = (float *)result->host_data;
-  int num_errors = 0;
-
-  printf("batch_dim = %lu, num_classes = %lu \n", batch_dim, num_classes);
-
-  for (int i = 0; i < batch_dim; i++) {
-
-    int chosen = 0;
-    for (int id = 1; id < num_classes; ++id) {
-      if (data[i * num_classes + chosen] < data[i * num_classes + id])
-        chosen = id;
-    }
-
-    if (chosen != labels[i])
-      num_errors++;
-  }
-
-  float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
-  printf("****** Accuracy = %f \n\n", accuracy);
-
-  FILE *fp = fopen("final_accuracy", "w+");
-  if (fp != NULL) {
-
-    std::ostringstream ss;
-    ss << std::fixed << accuracy;
-    std::string print_str = ss.str();
-
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-
-  return accuracy;
-}
-
-struct ClassProb {
-  float prob;
-  int index;
-};
-
-bool descendFloatComp(ClassProb obj1, ClassProb obj2) {
-  return obj1.prob > obj2.prob;
-}
-
-float computeTop5Accuracy(uint8_t *labels, int num_labels, void *result_ptr,
-                          unsigned num_classes = 10) {
-
-  struct Tensor *result = (struct Tensor *)result_ptr;
-
-  size_t batch_dim = result->dims.dim_sizes[0];
-  size_t channels = result->dims.dim_sizes[1];
-  float *data = (float *)result->host_data;
-  int num_errors = 0;
-
-  printf("batch_dim = %lu, channels = %lu \n", batch_dim, channels);
-
-  for (int i = 0; i < num_labels; i++) {
-
-    std::vector<ClassProb> elem_probs;
-    for (int id = 0; id < num_classes; ++id) {
-      ClassProb cProb;
-      cProb.prob = data[i * channels + id];
-      cProb.index = id;
-      elem_probs.push_back(cProb);
-    }
-
-  std:
-    sort(elem_probs.begin(), elem_probs.end(), descendFloatComp);
-    // Check if any of top-5 predictions matches
-    bool matched = false;
-    for (int j = 0; j < 5; j++) {
-      ClassProb cProb = elem_probs[j];
-      if (cProb.index == labels[i])
-        matched = true;
-    }
-
-    if (!matched)
-      num_errors += 1;
-  }
-
-  float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
-  printf("****** Accuracy = %f \n\n", accuracy);
-
-  FILE *fp = fopen("final_accuracy", "w+");
-  if (fp != NULL) {
-
-    std::ostringstream ss;
-    ss << std::fixed << accuracy;
-    std::string print_str = ss.str();
-
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-
-  return accuracy;
-}
-
-void dumpFinalAccuracy(float accuracy) {
-
-  printf("\n\n **** Final Accuracy = %f \n", accuracy);
-
-  FILE *fp = fopen("final_accuracy", "w+");
-  if (fp != NULL) {
-    std::ostringstream ss;
-    ss << std::fixed << accuracy;
-    std::string print_str = ss.str();
-
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-
-  run_accuracies.push_back(accuracy);
-}
-
-void dumpAvgPSNR(float avg_psnr) {
-
-  FILE *fp = fopen("avg_psnr", "w+");
-  if (fp != NULL) {
-    std::ostringstream ss;
-    ss << std::fixed << avg_psnr;
-    std::string print_str = ss.str();
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-}
-
-void dumpPSNRStd(float psnr_std) {
-
-  FILE *fp = fopen("psnr_std.txt", "w+");
-  if (fp != NULL) {
-    std::ostringstream ss;
-    ss << std::fixed << psnr_std;
-    std::string print_str = ss.str();
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-  }
-
-  fclose(fp);
-}
-
-void dumpExecutionAccuracies() {
-
-  FILE *fp = fopen("run_accuracies.txt", "w+");
-  if (fp != NULL) {
-    for (int i = 0; i < run_accuracies.size(); i++) {
-      float accuracy = run_accuracies[i];
-      std::ostringstream ss;
-      ss << std::fixed << accuracy;
-      std::string print_str = ss.str();
-      fwrite(print_str.c_str(), 1, print_str.length(), fp);
-      fwrite("\n", 1, 1, fp);
-    }
-  }
-
-  fclose(fp);
-}
-
-float readPSNRFromFile(const char *file_name) {
-
-  float psnr;
-  FILE *pFile = fopen(file_name, "r");
-  if (pFile == NULL) {
-    printf("ERROR: psnr.txt not found! \n");
-    abort();
-  }
-
-  fscanf(pFile, "%f", &psnr);
-  printf("**** PSNR read = %f \n\n", psnr);
-  return psnr;
-}
-
-float computePSNRViolation(void *gold_ptr, void *approx_ptr,
-                           float PSNR_threshold) {
-
-  PSNR_threshold = readPSNRFromFile("psnr.txt");
-  std::vector<float> psnr_list;
-
-  struct Tensor *gold_tensor = (struct Tensor *)gold_ptr;
-  struct Tensor *approx_tensor = (struct Tensor *)approx_ptr;
-
-  size_t *dim_sizes = gold_tensor->dims.dim_sizes;
-  size_t batch_dim = dim_sizes[0];
-  size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3];
-
-  printf("batch_dim = %lu, image_size = %lu \n", batch_dim, image_size);
-
-  float *gold_data = (float *)gold_tensor->host_data;
-  float *approx_data = (float *)approx_tensor->host_data;
-
-  FILE *fp = fopen("img_psnr.txt", "w+");
-
-  float sum_psnr = 0.0;
-  int num_errors = 0;
-  for (size_t i = 0; i < batch_dim; i++) {
-    float mse_sum = 0.0;
-    float max_val = -999999;
-    size_t offset = i * image_size;
-
-    for (size_t j = 0; j < image_size; j++) {
-      float diff = gold_data[offset + j] - approx_data[offset + j];
-      float diff_square = diff * diff;
-      mse_sum += diff_square;
-
-      if (max_val < gold_data[offset + j]) {
-        max_val = gold_data[offset + j];
-      }
-    }
-
-    mse_sum = mse_sum / image_size;
-    float psnr = 20 * log10(255 / sqrt(mse_sum));
-
-    sum_psnr += psnr;
-    if (psnr < PSNR_threshold)
-      num_errors += 1;
-
-    printf("PSNR value = %f \n", psnr);
-    psnr_list.push_back(psnr);
-
-    std::ostringstream ss;
-    ss << std::fixed << psnr;
-    std::string print_str = ss.str();
-    fwrite(print_str.c_str(), 1, print_str.length(), fp);
-    fwrite("\n", 1, 1, fp);
-  }
-
-  float violation_rate = (num_errors * 1.0) / batch_dim * 100.0;
-  printf("*** violation_rate= %f \n\n", violation_rate);
-
-  float avg_psnr = sum_psnr / batch_dim;
-  printf("*** avg_psnr =  %f \n\n", avg_psnr);
-  dumpAvgPSNR(avg_psnr);
-
-  float success_rate = 100.0 - violation_rate;
-  dumpFinalAccuracy(success_rate);
-
-  fclose(fp);
-
-  float var = 0.0;
-  for (size_t i = 0; i < batch_dim; i++) {
-    var = var + (psnr_list[i] - avg_psnr) * (psnr_list[i] - avg_psnr);
-  }
-
-  var /= batch_dim;
-  float std = sqrt(var);
-
-  dumpPSNRStd(std);
-
-  return violation_rate;
-}
-
-void dumpOutput(void *output_ptr, const char *file_name) {
-
-  struct Tensor *out_tensor = (struct Tensor *)output_ptr;
-  size_t size_in_bytes = out_tensor->size_in_bytes;
-  printf("** Output size = %lu \n", size_in_bytes);
-
-  float *host_data = (float *)out_tensor->host_data;
-  FILE *fd = fopen(file_name, "w+");
-  fwrite(host_data, 1, size_in_bytes, fd);
-  fclose(fd);
-}
-
-#endif