diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 1a5fbf0af68a3f9fd37ba297a58b85291179377b..bd5edbd1a467666f67c66be132b3a9d9bbd2d540 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -12,7 +12,7 @@ cache:
   paths:
     - hpvm/build/
     - hpvm/llvm/
-    - hpvm/test/dnn_benchmarks/model_params/
+  when: always
 
 build:
   stage: build
@@ -24,15 +24,5 @@ build:
     - ./install.sh -j32 -t "X86" DCMAKE_BUILD_TYPE=Release
     - cd ..
   only:
-    changes:
-      - hpvm/scripts/hpvm_installer.py
-
-tests:
-  stage: test
-  tags:
-    - hpvm
-  script:
-    - pwd
-    - source activate hpvm && cd hpvm
-    - ./install.sh -j32 -t "X86" DCMAKE_BUILD_TYPE=Release
-    - cd build && make -j32 check-hpvm-pass
+    - hpvm-release-exp
+    - merge_requests
diff --git a/hpvm/CMakeLists.txt b/hpvm/CMakeLists.txt
index 809a30cfa52e16f436dac4e22843f4c5a3add3d9..fcfaf264a64d52bfe13e0023fe92ad12b7cf2016 100644
--- a/hpvm/CMakeLists.txt
+++ b/hpvm/CMakeLists.txt
@@ -7,11 +7,10 @@ message(STATUS "CUDA Architecture: ${CMAKE_CUDA_ARCHITECTURES}")
 
 # find_package will use the auxillary cmake/Find*.cmake we provide
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-find_package(CUDNN 7 EXACT REQUIRED)  # CUDNN_INCLUDE_PATH, CUDNN_LIBRARY_PATH
+find_package(CUDNN 7 EXACT REQUIRED)  # CUDNN_INCLUDE_PATH, CUDNN_LIBRARY_DIR and CUDNN::cudnn
+find_package(OpenCL REQUIRED)  # Defines ${OpenCL_INCLUDE_DIRS} and ${OpenCL_LIBRARY}
 
 include_directories(./include/)
-# find_package will use the auxillary cmake/Find*.cmake we provide
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 
 # Generate TENSOR_RT_PREFIX into config.h
 set(TENSOR_RT_PREFIX ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
diff --git a/hpvm/cmake/FindCUDNN.cmake b/hpvm/cmake/FindCUDNN.cmake
index e5a427f0317a6f3b8f7e7b2cc89fd176fd4362dc..bb4918f704a5fe210f0e25e893e1b33335189add 100644
--- a/hpvm/cmake/FindCUDNN.cmake
+++ b/hpvm/cmake/FindCUDNN.cmake
@@ -10,8 +10,9 @@
 # The following are set after configuration is done:
 #  CUDNN_FOUND
 #  CUDNN_INCLUDE_PATH
-#  CUDNN_LIBRARY_PATH
+#  CUDNN_LIBRARY_DIR
 #
+# It also provides the IMPORTed target CUDNN::cudnn.
 
 include(FindPackageHandleStandardArgs)
 
@@ -45,11 +46,8 @@ endif()
 find_library(CUDNN_LIBRARY_PATH ${CUDNN_LIBNAME}
   PATHS ${CUDNN_LIBRARY}
   PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64)
-# Get director from filename ${CUDNN_LIBRARY_PATH}
-get_filename_component(
-  CUDNN_LIBRARY_PATH
-  "${CUDNN_LIBRARY_PATH}/.." ABSOLUTE
-)
+# Get directory from filename ${CUDNN_LIBRARY_PATH}
+get_filename_component(CUDNN_LIBRARY_DIR "${CUDNN_LIBRARY_PATH}/.." ABSOLUTE)
 
 # This version check is from OpenCV repo: https://github.com/opencv/opencv/blob/master/cmake/FindCUDNN.cmake
 # extract version from the include
@@ -80,4 +78,8 @@ find_package_handle_standard_args(
   VERSION_VAR CUDNN_VERSION
 )
 
+add_library(CUDNN::cudnn IMPORTED INTERFACE)
+target_include_directories(CUDNN::cudnn SYSTEM INTERFACE "${CUDNN_INCLUDE_PATH}")
+target_link_libraries(CUDNN::cudnn INTERFACE "${CUDNN_LIBRARY_PATH}")
+
 mark_as_advanced(CUDNN_ROOT CUDNN_INCLUDE_DIR CUDNN_LIBRARY)
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/hpvm.h b/hpvm/include/hpvm.h
similarity index 100%
rename from hpvm/test/dnn_benchmarks/hpvm-c/include/hpvm.h
rename to hpvm/include/hpvm.h
diff --git a/hpvm/projects/hpvm-rt/CMakeLists.txt b/hpvm/projects/hpvm-rt/CMakeLists.txt
index 6efd8d3d0a9d86236adc87657fb68b782f3daaa0..ad78c35828cf9be9f66f23301fbe4d207222d4d1 100644
--- a/hpvm/projects/hpvm-rt/CMakeLists.txt
+++ b/hpvm/projects/hpvm-rt/CMakeLists.txt
@@ -3,8 +3,6 @@ add_definitions(-DNUM_CORES=8)
 SET(CMAKE_C_COMPILER ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang)
 SET(CMAKE_CXX_COMPILER ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clang++)
 SET(CMAKE_CXX_STANDARD 11)
-# Defines ${OpenCL_INCLUDE_DIRS} and ${OpenCL_LIBRARY} if found
-find_package(OpenCL REQUIRED)
 
 # This puts libhpvm-rt.a in lib/ which we don't care about
 # we want ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/hpvm-rt.dir/hpvm-rt.cpp.o
diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
index a142d524b69cb605b85c496aa140c806ad258dfd..f6fed2ac296f93bc060fe09b3b889b42ee8c4a1a 100644
--- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
+++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
@@ -24,10 +24,8 @@ configure_file(
 # -- Default include directories
 set(
   INCLUDES
-  ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
-  ${CUDNN_INCLUDE_PATH}
   ./tensor_runtime/include ${CMAKE_CURRENT_BINARY_DIR}/tensor_runtime/include
-  ./dnn_sources/include
+  ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDNN_INCLUDE_PATH}
 )
 
 # Build gpu_profiler and soc_simulator (dependencies)
@@ -39,13 +37,43 @@ add_library(soc_simulator SHARED soc_simulator/promise_timing_model.cpp)
 target_include_directories(soc_simulator PUBLIC soc_simulator/)
 
 # -- Link libraries
-find_package(OpenMP REQUIRED)  # Provides ${OpenMP_CXX_FLAGS}
-set(LINK_DIR CUDNN_LIBRARY_PATH)
-set(LINK_LIBS gpu_profiler soc_simulator stdc++fs cudnn curand cublas)
+find_package(OpenMP REQUIRED)  # Provides ${OpenMP_CXX_FLAGS} and OpenMP::OpenMP_CXX
+# This will use the CUDA found by CUDA language support in the root CMake,
+# but it exports the CUDA::* targets (used below) so we can freely add libraries to link to.
+find_package(CUDAToolkit REQUIRED)
+set(
+  LINK_LIBS
+  gpu_profiler soc_simulator
+  CUDA::cublas CUDA::curand CUDNN::cudnn
+  OpenMP::OpenMP_CXX
+)
 if(USE_GFLAGS)
   list(APPEND LINK_LIBS gflags)
 endif()
 
+# -- Manually list the directories (TRT_LINK_DIRS) and libraries (TRT_LINK_LIBS)
+# tensor_runtime links to, because CMake doesn't help us do this.
+# This is needed by both approxhpvm.py and the RPATH setting step (below).
+# First, take a guess at the paths to the libraries that are used here.
+# (CMake, why do you make this so difficult?)
+foreach(interface_lib ${LINK_LIBS})
+  get_target_property(actual_libs ${interface_lib} INTERFACE_LINK_LIBRARIES)
+  foreach(actual_lib ${actual_libs})
+    # ${actual_lib} may not be a path, then taking the directory of it should return "".
+    get_filename_component(libdir ${actual_lib} DIRECTORY)
+    get_filename_component(libname ${actual_lib} NAME)
+    if(NOT ${libdir} STREQUAL "")
+      list(APPEND TRT_LINK_DIRS ${libdir})
+    endif()
+    if(NOT ${libname} STREQUAL "" AND NOT ${libname} STREQUAL "actual_libs-NOTFOUND")
+      list(APPEND TRT_LINK_LIBS ${libname})
+    endif()
+  endforeach()
+endforeach()
+# Dedup, just for shorter compiler arguments.
+list(REMOVE_DUPLICATES TRT_LINK_DIRS)
+list(REMOVE_DUPLICATES TRT_LINK_LIBS)
+
 # -- Definitions
 set(DEFS -DPROMISE_TUNER_ENABLED -DSIMULATION_MODE=true)
 if(USE_GFLAGS)
@@ -77,16 +105,27 @@ endforeach()
 # -- Adding tensor_runtime targets
 function(add_tensor_runtime target_name)
   add_library(${target_name} SHARED ${RUNTIME_SRCS})
-  set_property(TARGET ${target_name} PROPERTY CUDA_ARCHITECTURES 60)
+  set_target_properties(${target_name} PROPERTIES CUDA_ARCHITECTURES 60)
   target_compile_options(
     ${target_name} PRIVATE
     --expt-relaxed-constexpr -maxrregcount 32 -Xcompiler=${OpenMP_CXX_FLAGS}
     $<$<CONFIG:DEBUG>:-lineinfo -Xcompiler=-ggdb>
   )
   target_include_directories(${target_name} PUBLIC ${INCLUDES})
-  target_link_directories(${target_name} PUBLIC ${LINK_DIR})
-  target_link_libraries(${target_name} PUBLIC ${LINK_LIBS} ${OpenMP_CXX_FLAGS})
+  target_link_libraries(${target_name} PUBLIC ${LINK_LIBS})
   target_compile_definitions(${target_name} PRIVATE ${DEFS} ${ARGN})
+
+  # We have to manually set rpath because cmake is not willing to comply...
+  foreach(libdir ${TRT_LINK_DIRS})
+    target_link_options(${target_name} PRIVATE "-Wl,-rpath,${libdir}")
+  endforeach()
+  # Also slap TRT_LINK_DIRS and TRT_LINK_LIBS on this target
+  # so that approxhpvm.py can read them. (we'll create our own properties.)
+  set_target_properties(
+    ${target_name} PROPERTIES
+    TRT_LINK_DIRS "${TRT_LINK_DIRS}"
+    TRT_LINK_LIBS "${TRT_LINK_LIBS}"
+  )
 endfunction(add_tensor_runtime)
 
 # Adding new rule for building a cuDNN runtime library
@@ -101,8 +140,8 @@ add_tensor_runtime(tensor_runtime_online -DONLINE_PROFILING=true -DFP16_tuning=f
 add_dependencies(tensor_runtime_online tensor_runtime)
 
 # Adding rule for the debugging source
-add_executable(unit_tests dnn_sources/src/unit_tests.cc)
-target_link_libraries(unit_tests  tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}  ${OpenMP_CXX_FLAGS})
+add_executable(unit_tests tests/unit_tests.cc)
+target_link_libraries(unit_tests tensor_runtime_online)
 
 # -- Compile tensor_runtime.ll if possible
 if(INDEP_BUILD)
@@ -126,6 +165,8 @@ endif()
 # If some clang-9 is found, create a tensor_runtime.ll from tensor_signatures.cc
 if(CLANG_NAME)
   message(STATUS "Creating tensor_runtime.ll in ${TENSOR_RT_LL_PREFIX}")
+  # Manually add cuda includes because add_custom_command doesn't handle them
+  # (unlike add_library which has CUDA-lang support).
   foreach(dir ${INCLUDES})
     list(APPEND INCLUDE_COMPILER_STRINGS "-I${dir}")
   endforeach()
@@ -136,59 +177,3 @@ if(CLANG_NAME)
     -o ${TENSOR_RT_LL_PREFIX}/tensor_runtime.ll
   )
 endif()
-
-
-#**************** FP32 TensorRT Source Builds *********** 
-
-add_executable(lenet_mnist_fp32 dnn_sources/src/fp32/lenet_mnist.cc)
-target_link_libraries(lenet_mnist_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(alexnet_cifar10_fp32 dnn_sources/src/fp32/alexnet_cifar10.cc)
-target_link_libraries(alexnet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(alexnet2_cifar10_fp32 dnn_sources/src/fp32/alexnet2_cifar10.cc)
-target_link_libraries(alexnet2_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(vgg16_cifar10_fp32 dnn_sources/src/fp32/vgg16_cifar10.cc)
-target_link_libraries(vgg16_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(resnet18_cifar10_fp32 dnn_sources/src/fp32/resnet18_cifar10.cc)
-target_link_libraries(resnet18_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(vgg16_cifar100_fp32 dnn_sources/src/fp32/vgg16_cifar100.cc)
-target_link_libraries(vgg16_cifar100_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(mobilenet_cifar10_fp32 dnn_sources/src/fp32/mobilenet.cc)
-target_link_libraries(mobilenet_cifar10_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(alexnet_imagenet_fp32 dnn_sources/src/fp32/alexnet_imagenet.cc)
-target_link_libraries(alexnet_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(vgg16_imagenet_fp32 dnn_sources/src/fp32/vgg16_imagenet.cc)
-target_link_libraries(vgg16_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(resnet50_imagenet_fp32 dnn_sources/src/fp32/resnet50_imagenet.cc)
-target_link_libraries(resnet50_imagenet_fp32 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-#********* FP16 TensorRT Source Builds ****** 
-
-add_executable(lenet_mnist_fp16 dnn_sources/src/fp16/lenet_mnist_half.cc)
-target_link_libraries(lenet_mnist_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(alexnet_cifar10_fp16 dnn_sources/src/fp16/alexnet_cifar10_half.cc)
-target_link_libraries(alexnet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(alexnet2_cifar10_fp16 dnn_sources/src/fp16/alexnet2_cifar10_half.cc)
-target_link_libraries(alexnet2_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(resnet18_cifar10_fp16 dnn_sources/src/fp16/resnet18_cifar10_half.cc)
-target_link_libraries(resnet18_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(vgg16_cifar10_fp16 dnn_sources/src/fp16/vgg16_cifar10_half.cc)
-target_link_libraries(vgg16_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(vgg16_cifar100_fp16 dnn_sources/src/fp16/vgg16_cifar100_half.cc)
-target_link_libraries(vgg16_cifar100_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(mobilenet_cifar10_fp16 dnn_sources/src/fp16/mobilenet_half.cc)
-target_link_libraries(mobilenet_cifar10_fp16 tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensorUtils.h
similarity index 50%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
rename to hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensorUtils.h
index 61fd362afcc665e21a7ba8636c8df778ac95184e..d4961d19b9326daa4571d066dfe2b3177f6a78d4 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensorUtils.h
@@ -3,23 +3,35 @@
 #ifndef UTILS_HEADER
 #define UTILS_HEADER
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
 #include <sstream>
 #include <vector>
 #include <bits/stdc++.h>
 #include <tensor_runtime.h>
 #include <tensor.h>
 #include <cmath>
-#include <string.h>
+
 
 std::vector<float> run_accuracies;
 std::string model_params_path = "../../test/dnn_benchmarks/model_params/";
 
+
+void printTensorInfo(void *tensor_ptr) {
+
+  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
+
+  if (tensor->gpu_data != NULL) {
+    printf("Successful cudaMalloc \n");
+  }
+
+  printf("tensor dims = %d \n", tensor->dims.num_dims);
+  printf("dim1_size = %lu \n", tensor->dims.dim_sizes[0]);
+  printf("dim2_size = %lu \n", tensor->dims.dim_sizes[1]);
+  printf("num_elems = %lu \n", tensor->num_elems);
+}
+
 // FIXIT: Move this to debug.h and include in all files
-void dumpWeightsToFile(const char *file_name, void *weights_ptr) {
+void dumpWeightsToFile(char *file_name, void *weights_ptr) {
+
   struct Tensor *weights = (Tensor *)weights_ptr;
   // Move data back to host
   hpvm_request_tensor(weights, 0);
@@ -31,6 +43,10 @@ void dumpWeightsToFile(const char *file_name, void *weights_ptr) {
     abort();
   }
 
+  // printf("size_in_bytes = %lu \n", weights->size_in_bytes);
+  size_t bytes_written =
+      fwrite(weights->host_data, 1, weights->size_in_bytes, fp);
+  // printf("bytes_written = %lu \n", bytes_written);
   fclose(fp);
 }
 
@@ -58,21 +74,13 @@ void fillWithOnesAndTwos(void *tensor_ptr) {
   // initialization is specific to the floating point type
   if (tensor->data_type == CUDNN_DATA_FLOAT) {
     float *data_arr = (float *)tensor->host_data;
-
-    for (unsigned int i = 0; i < tensor->num_elems; i++) {
-      if (i % 2 == 0)
-        data_arr[i] = 1.0;
-      else
-        data_arr[i] = 2.0;
-    }
-
-    /*for(unsigned int i = 0; i < tensor->num_elems/2; i++){
+    for (unsigned int i = 0; i < tensor->num_elems / 2; i++) {
       data_arr[i] = 1.0;
     }
 
-    for(unsigned int i = tensor->num_elems/2; i < tensor->num_elems; i++){
+    for (unsigned int i = tensor->num_elems / 2; i < tensor->num_elems; i++) {
       data_arr[i] = 2.0;
-    }*/
+    }
   }
 }
 
@@ -106,6 +114,18 @@ void fillTensorWithNegOnes(void *tensor_ptr) {
   }
 }
 
+void fillTensorVals(void *tensor_ptr) {
+
+  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
+  // initialization is specific to the floating point type
+  if (tensor->data_type == CUDNN_DATA_FLOAT) {
+    float *data_arr = (float *)tensor->host_data;
+    for (unsigned int i = 0; i < tensor->num_elems; i++) {
+      data_arr[i] = i + 1;
+    }
+  }
+}
+
 void printTensorValues(void *tensor_ptr) {
 
   struct Tensor *tensor = (struct Tensor *)tensor_ptr;
@@ -113,14 +133,11 @@ void printTensorValues(void *tensor_ptr) {
   hpvm_request_tensor(tensor, 0);
 
   // printing is specific to the floating point type
-  if (tensor->data_type != CUDNN_DATA_FLOAT) {
-    // printf("\n WARNING: The tensor is non-float type tensor \n\n");
-  }
-
-  float *data_arr = (float *)tensor->host_data;
-
-  for (unsigned int i = 0; i < tensor->num_elems; i++) {
-    printf("%f,", data_arr[i]);
+  if (tensor->data_type == CUDNN_DATA_FLOAT) {
+    float *data_arr = (float *)tensor->host_data;
+    for (unsigned int i = 0; i < tensor->num_elems; i++) {
+      printf("%f,", data_arr[i]);
+    }
   }
 
   printf("\n");
@@ -131,11 +148,49 @@ void printTensorDims(void *tensor_ptr) {
   struct Tensor *tensor = (struct Tensor *)tensor_ptr;
 
   printf("Num_elems = %lu \n", tensor->num_elems);
-  for (int i = 0; i < tensor->dims.num_dims; i++) {
+  for (unsigned int i = 0; i < tensor->dims.num_dims; i++) {
     printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]);
   }
 }
 
+void compareTensors(void *tensor1_ptr, void *tensor2_ptr) {
+
+  struct Tensor *tensor1 = (struct Tensor *)tensor1_ptr;
+  struct Tensor *tensor2 = (struct Tensor *)tensor2_ptr;
+
+  hpvm_request_tensor(tensor1, 0);
+  hpvm_request_tensor(tensor2, 0);
+
+  float *tensor_data1 = (float *)tensor1->host_data;
+  float *tensor_data2 = (float *)tensor2->host_data;
+
+  for (unsigned int i = 0; i < tensor1->num_elems; i++) {
+    if (tensor_data1[i] != tensor_data2[i]) {
+      printf("Tensor data mismatch at index %d \n", i);
+      abort();
+    }
+  }
+}
+
+void compareValues(void *tensor_ptr, float *data, size_t num_elems) {
+
+  struct Tensor *tensor = (struct Tensor *)tensor_ptr;
+
+  hpvm_request_tensor(tensor, 0);
+
+  float *tensor_data = (float *)tensor->host_data;
+  for (unsigned int i = 0; i < num_elems; i++) {
+    if (tensor_data[i] != data[i]) {
+      printf("Tensor data mismatch");
+      abort();
+    }
+  }
+}
+
+
+
+
+
 struct Tensor *readTrainedWeights(const char *file_name, int data_type,
                                   long int dim1_size, long int dim2_size,
                                   long int dim3_size, long int dim4_size) {
@@ -146,7 +201,7 @@ struct Tensor *readTrainedWeights(const char *file_name, int data_type,
   long int size_in_bytes =
       type_size * dim1_size * dim2_size * dim3_size * dim4_size;
   float *tensor_data = (float *)malloc(sizeof(float) * num_elems);
-  // printf("size_in_bytes  = %lu \n", size_in_bytes);
+  printf("size_in_bytes  = %lu \n", size_in_bytes);
 
   int file_header_size = 0;
 
@@ -157,7 +212,11 @@ struct Tensor *readTrainedWeights(const char *file_name, int data_type,
   }
 
   fseek(file, file_header_size, SEEK_CUR); // Skipping the file header
-  fread(tensor_data, 1, size_in_bytes, file);
+  size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
+
+  // printf("size in bytes = %lu, bytes read = %lu \n", size_in_bytes,
+  // bytes_read);
+
   fclose(file);
 
   struct Tensor *weights = (struct Tensor *)create4DTensor(
@@ -170,9 +229,9 @@ struct Tensor *readTrainedWeights(const char *file_name, int data_type,
   return weights;
 }
 
-struct Tensor *readInputBatch(const char *file_name, int data_type,
-                              long int start, long int end, long int dim2_size,
-                              long int dim3_size, long int dim4_size) {
+struct Tensor *readInputBatch(const char *file_name, long data_type,
+			      long start, long end,
+			      long dim2_size, long dim3_size, long dim4_size) {
 
   long int dim1_size = end - start;
   // FIXIT: Don't assume floating point types
@@ -191,9 +250,12 @@ struct Tensor *readInputBatch(const char *file_name, int data_type,
   }
 
   fseek(file, file_header_size, SEEK_SET); // Skipping the file header
-  fread(tensor_data, 1, size_in_bytes, file);
+  size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
+
   fclose(file);
 
+  // printf ("FIXED input BATCH read \n");
+
   struct Tensor *weights = (struct Tensor *)create4DTensor(
       data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size);
 
@@ -203,10 +265,7 @@ struct Tensor *readInputBatch(const char *file_name, int data_type,
   return weights;
 }
 
-uint8_t *readLabelsBatch(const char *labels_file, int start, int end) {
-
-  int num_labels = end - start;
-  int file_header_size = sizeof(uint8_t) * start;
+uint8_t *readLabels(const char *labels_file, int num_labels) {
 
   uint8_t *labels = (uint8_t *)malloc(sizeof(uint8_t) * num_labels);
   FILE *file = fopen(labels_file, "rb");
@@ -215,14 +274,30 @@ uint8_t *readLabelsBatch(const char *labels_file, int start, int end) {
     abort();
   }
 
-  fseek(file, file_header_size, SEEK_SET); // Skipping the file header
-  fread(labels, 1, sizeof(uint8_t) * num_labels, file);
+  size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file);
+
+  fclose(file);
+
+  return labels;
+}
+
+uint32_t *readLabels3(const char *labels_file, int num_labels) {
+
+  uint32_t *labels = (uint32_t *)malloc(sizeof(uint32_t) * num_labels);
+  FILE *file = fopen(labels_file, "rb");
+  if (file == NULL) {
+    printf("Data file %s is not found. Aborting...\n", labels_file);
+    abort();
+  }
+
+  size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file);
+
   fclose(file);
 
-  // printf("--labels bytes_read = %lu \n", bytes_read);
   return labels;
 }
 
+
 uint32_t *readLabelsBatch3(const char *labels_file, int start, int end) {
 
   int num_labels = end - start;
@@ -236,29 +311,31 @@ uint32_t *readLabelsBatch3(const char *labels_file, int start, int end) {
   }
 
   fseek(file, file_header_size, SEEK_SET); // Skipping the file header
-  fread(labels, 1, sizeof(uint32_t) * num_labels, file);
+
+  size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file);
+
   fclose(file);
 
   return labels;
 }
 
-// NOTE: batch_size and num_classes are Unused arguments
-float computeAccuracy2(uint8_t *labels, int batch_size, void *result_ptr,
-                       size_t num_classes = 10) {
+
+
+float computeAccuracy3(uint32_t *labels, void *result_ptr) {
 
   struct Tensor *result = (struct Tensor *)result_ptr;
 
   size_t batch_dim = result->dims.dim_sizes[0];
-  num_classes = result->dims.dim_sizes[1];
+  size_t num_classes = result->dims.dim_sizes[1];
   float *data = (float *)result->host_data;
   int num_errors = 0;
 
-  printf("batch_dim = %lu, channels = %lu \n", batch_dim, num_classes);
+  printf("batch_dim = %lu, num_classes = %lu \n", batch_dim, num_classes);
 
   for (unsigned int i = 0; i < batch_dim; i++) {
 
     int chosen = 0;
-    for (size_t id = 1; id < num_classes; ++id) {
+    for (unsigned int id = 1; id < num_classes; ++id) {
       if (data[i * num_classes + chosen] < data[i * num_classes + id])
         chosen = id;
     }
@@ -285,27 +362,49 @@ float computeAccuracy2(uint8_t *labels, int batch_size, void *result_ptr,
   return accuracy;
 }
 
-float computeAccuracy3(uint32_t *labels, void *result_ptr) {
+struct ClassProb {
+  float prob;
+  int index;
+};
+
+bool descendFloatComp(ClassProb obj1, ClassProb obj2) {
+  return obj1.prob > obj2.prob;
+}
+
+float computeTop5Accuracy(uint8_t *labels, int num_labels, void *result_ptr,
+                          unsigned num_classes = 10) {
 
   struct Tensor *result = (struct Tensor *)result_ptr;
 
   size_t batch_dim = result->dims.dim_sizes[0];
-  size_t num_classes = result->dims.dim_sizes[1];
+  size_t channels = result->dims.dim_sizes[1];
   float *data = (float *)result->host_data;
   int num_errors = 0;
 
-  printf("batch_dim = %lu, num_classes = %lu \n", batch_dim, num_classes);
+  printf("batch_dim = %lu, channels = %lu \n", batch_dim, channels);
 
-  for (size_t i = 0; i < batch_dim; i++) {
+  for (unsigned int i = 0; i < num_labels; i++) {
 
-    uint32_t chosen = 0;
-    for (size_t id = 1; id < num_classes; ++id) {
-      if (data[i * num_classes + chosen] < data[i * num_classes + id])
-        chosen = id;
+    std::vector<ClassProb> elem_probs;
+    for (unsigned int id = 0; id < num_classes; ++id) {
+      ClassProb cProb;
+      cProb.prob = data[i * channels + id];
+      cProb.index = id;
+      elem_probs.push_back(cProb);
     }
 
-    if (chosen != labels[i])
-      num_errors++;
+  std:
+    sort(elem_probs.begin(), elem_probs.end(), descendFloatComp);
+    // Check if any of top-5 predictions matches
+    bool matched = false;
+    for (int j = 0; j < 5; j++) {
+      ClassProb cProb = elem_probs[j];
+      if (cProb.index == labels[i])
+        matched = true;
+    }
+
+    if (!matched)
+      num_errors += 1;
   }
 
   float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
@@ -344,11 +443,38 @@ void dumpFinalAccuracy(float accuracy) {
   run_accuracies.push_back(accuracy);
 }
 
+void dumpAvgPSNR(float avg_psnr) {
+
+  FILE *fp = fopen("avg_psnr", "w+");
+  if (fp != NULL) {
+    std::ostringstream ss;
+    ss << std::fixed << avg_psnr;
+    std::string print_str = ss.str();
+    fwrite(print_str.c_str(), 1, print_str.length(), fp);
+  }
+
+  fclose(fp);
+}
+
+void dumpPSNRStd(float psnr_std) {
+
+  FILE *fp = fopen("psnr_std.txt", "w+");
+  if (fp != NULL) {
+    std::ostringstream ss;
+    ss << std::fixed << psnr_std;
+    std::string print_str = ss.str();
+    fwrite(print_str.c_str(), 1, print_str.length(), fp);
+  }
+
+  fclose(fp);
+}
+
+
 void dumpExecutionAccuracies() {
 
   FILE *fp = fopen("run_accuracies.txt", "w+");
   if (fp != NULL) {
-    for (size_t i = 0; i < run_accuracies.size(); i++) {
+    for (unsigned int i = 0; i < run_accuracies.size(); i++) {
       float accuracy = run_accuracies[i];
       std::ostringstream ss;
       ss << std::fixed << accuracy;
@@ -360,4 +486,110 @@ void dumpExecutionAccuracies() {
 
   fclose(fp);
 }
+
+float readPSNRFromFile(const char *file_name) {
+
+  float psnr;
+  FILE *pFile = fopen(file_name, "r");
+  if (pFile == NULL) {
+    printf("ERROR: psnr.txt not found! \n");
+    abort();
+  }
+
+  fscanf(pFile, "%f", &psnr);
+  printf("**** PSNR read = %f \n\n", psnr);
+  return psnr;
+}
+
+float computePSNRViolation(void *gold_ptr, void *approx_ptr,
+                           float PSNR_threshold) {
+
+  PSNR_threshold = readPSNRFromFile("psnr.txt");
+  std::vector<float> psnr_list;
+
+  struct Tensor *gold_tensor = (struct Tensor *)gold_ptr;
+  struct Tensor *approx_tensor = (struct Tensor *)approx_ptr;
+
+  size_t *dim_sizes = gold_tensor->dims.dim_sizes;
+  size_t batch_dim = dim_sizes[0];
+  size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3];
+
+  printf("batch_dim = %lu, image_size = %lu \n", batch_dim, image_size);
+
+  float *gold_data = (float *)gold_tensor->host_data;
+  float *approx_data = (float *)approx_tensor->host_data;
+
+  FILE *fp = fopen("img_psnr.txt", "w+");
+
+  float sum_psnr = 0.0;
+  int num_errors = 0;
+  for (size_t i = 0; i < batch_dim; i++) {
+    float mse_sum = 0.0;
+    float max_val = -999999;
+    size_t offset = i * image_size;
+
+    for (size_t j = 0; j < image_size; j++) {
+      float diff = gold_data[offset + j] - approx_data[offset + j];
+      float diff_square = diff * diff;
+      mse_sum += diff_square;
+
+      if (max_val < gold_data[offset + j]) {
+        max_val = gold_data[offset + j];
+      }
+    }
+
+    mse_sum = mse_sum / image_size;
+    float psnr = 20 * log10(255 / sqrt(mse_sum));
+
+    sum_psnr += psnr;
+    if (psnr < PSNR_threshold)
+      num_errors += 1;
+
+    printf("PSNR value = %f \n", psnr);
+    psnr_list.push_back(psnr);
+
+    std::ostringstream ss;
+    ss << std::fixed << psnr;
+    std::string print_str = ss.str();
+    fwrite(print_str.c_str(), 1, print_str.length(), fp);
+    fwrite("\n", 1, 1, fp);
+  }
+
+  float violation_rate = (num_errors * 1.0) / batch_dim * 100.0;
+  printf("*** violation_rate= %f \n\n", violation_rate);
+
+  float avg_psnr = sum_psnr / batch_dim;
+  printf("*** avg_psnr =  %f \n\n", avg_psnr);
+  dumpAvgPSNR(avg_psnr);
+
+  float success_rate = 100.0 - violation_rate;
+  dumpFinalAccuracy(success_rate);
+
+  fclose(fp);
+
+  float var = 0.0;
+  for (size_t i = 0; i < batch_dim; i++) {
+    var = var + (psnr_list[i] - avg_psnr) * (psnr_list[i] - avg_psnr);
+  }
+
+  var /= batch_dim;
+  float std = sqrt(var);
+
+  dumpPSNRStd(std);
+
+  return violation_rate;
+}
+
+void dumpOutput(void *output_ptr, const char *file_name) {
+
+  struct Tensor *out_tensor = (struct Tensor *)output_ptr;
+  size_t size_in_bytes = out_tensor->size_in_bytes;
+  printf("** Output size = %lu \n", size_in_bytes);
+
+  float *host_data = (float *)out_tensor->host_data;
+  FILE *fd = fopen(file_name, "w+");
+  fwrite(host_data, 1, size_in_bytes, fd);
+  fclose(fd);
+}
+
 #endif
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc b/hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc
similarity index 99%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc
rename to hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc
index 746f62bce19b25c3b74bec4908cdc3c87bee034a..ffb4c3a809b3e936f6c27ebd7c11aef5c4460104 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/unit_tests.cc
+++ b/hpvm/projects/hpvm-tensor-rt/tests/unit_tests.cc
@@ -6,7 +6,7 @@
 #include <string.h>
 #include "tensor_runtime.h"
 #include "tensor_cpu_runtime.h"
-#include "utils.h"
+#include "tensorUtils.h"
 #include "tensor_custom_ops_cpu.h"
 
 using namespace std;
diff --git a/hpvm/test/CMakeLists.txt b/hpvm/test/CMakeLists.txt
index 660003538fe72d45e3dbfc1178fd296cdc7156b5..cb548b84dc9944e54c7dfdd0a0d74cf0aed9aafd 100644
--- a/hpvm/test/CMakeLists.txt
+++ b/hpvm/test/CMakeLists.txt
@@ -1,4 +1,5 @@
 include(../cmake/TestFile.cmake)  # Generation of `.test` files in CMake
 add_subdirectory(hpvm_pass)  # Passes test suite
-add_subdirectory(dnn_benchmarks/hpvm-c)  # DNN accuracy test suite
+add_subdirectory(dnn_benchmarks/hpvm-c)  # HPVM-C DNN accuracy test suite
+add_subdirectory(dnn_benchmarks/tensor-rt-src)  # tensor_runtime DNN (build only, no tests)
 add_subdirectory(dnn_benchmarks/profiling)  # hpvm-profiler test suite
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
index 3a0c6534e02ce82fdfd02f483f71c6be1a9ab433..6664827014d40c8a101f0aa30499228345edc460 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/CMakeLists.txt
@@ -1,50 +1,18 @@
 # First get approxhpvm.py which we then use to compile benchmarks.
 get_filename_component(APPROXHPVM_PY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py REALPATH)
 
-# Configure config.h which tells the benchmarks where's the model parameter directory.
-# We can also use the one in tensor_runtime, but we're avoiding that so as to 
-# decouple things.
+# Each source file contains a @MODEL_PARAMS_DIR@ waiting to be filled in.
 set(MODEL_PARAMS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../model_params/")
-configure_file(
-  "include/config.h.in"
-  "${CMAKE_CURRENT_BINARY_DIR}/include/config.h"
-)
-# This will be an extra include directory (specific to these benchmarks)
-# and we'll give this to approxhpvm.py
-set(CONFIG_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
-
-# --[ llvm-lit test setup
-# lit.cfg.py looks for tests in CMAKE_CURRENT_BINARY_DIR (see lit.cfg.py)
-# as most of the tests require some kind of compilation / generation
-# which is best done over there.
-configure_lit_site_cfg(
-  ../../lit.site.cfg.py.in
-  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
-  MAIN_CONFIG
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
-)
-add_lit_testsuite(check-hpvm-dnn "Running HPVM DNNs"
-  ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS dnn_benchmarks  # Compile all dnn benchmarks to run them
-  ARGS "-j1"  # Run DNN benchmarks sequentially
-)
-# Install an accuracy comparator under build/bin
-set(BIN_DIR ${LLVM_BINARY_DIR}/${LLVM_TOOLS_INSTALL_DIR})
-add_custom_command(
-  OUTPUT ${BIN_DIR}/check_dnn_acc.py
-  COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py ${BIN_DIR}
-  COMMAND chmod +x ${BIN_DIR}/check_dnn_acc.py
-  DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py
-)
-
 set(test_compile_targets "")
 function(compile_hpvm_c bin_filename src_filepath codegen_target)
+  set(generated_file_path "${CMAKE_CURRENT_BINARY_DIR}/${bin_filename}.cpp")
+  configure_file(${src_filepath} ${generated_file_path})
   add_custom_command(
     OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bin_filename}
-    DEPENDS ${src_filepath} approxhpvm.py
+    DEPENDS ${generated_file_path} approxhpvm.py
     COMMAND ${APPROXHPVM_PY}
-      ${src_filepath} ${CMAKE_CURRENT_BINARY_DIR}/${bin_filename}
-      -t ${codegen_target} -I ${CONFIG_INCLUDE_DIR} ${ARGV}
+      ${generated_file_path} ${CMAKE_CURRENT_BINARY_DIR}/${bin_filename}
+      -t ${codegen_target} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${ARGN}
   )
   add_custom_target(${bin_filename} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bin_filename})
   set(test_compile_targets ${test_compile_targets} ${bin_filename} PARENT_SCOPE)
@@ -79,6 +47,31 @@ foreach(dir ${entries})
   hpvm_add_dnn_test(${dirname}_cudnn)
 endforeach(dir)
 
+# Install an accuracy comparator under build/bin for test suite.
+set(BIN_DIR ${LLVM_BINARY_DIR}/${LLVM_TOOLS_INSTALL_DIR})
+add_custom_command(
+  OUTPUT ${BIN_DIR}/check_dnn_acc.py
+  COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py ${BIN_DIR}
+  COMMAND chmod +x ${BIN_DIR}/check_dnn_acc.py
+  DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/check_dnn_acc.py
+)
+
 message(STATUS "List of HPVM-C DNN benchmarks: ${test_compile_targets}")
 add_custom_target(dnn_benchmarks DEPENDS ${test_compile_targets} ${BIN_DIR}/check_dnn_acc.py)
 message(STATUS "Target name for compiling all DNN benchmarks: dnn_benchmarks")
+
+# --[ llvm-lit test setup
+# lit.cfg.py looks for tests in CMAKE_CURRENT_BINARY_DIR (see lit.cfg.py)
+# as most of the tests require some kind of compilation / generation
+# which is best done over there.
+configure_lit_site_cfg(
+  ../../lit.site.cfg.py.in
+  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
+  MAIN_CONFIG
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
+)
+add_lit_testsuite(check-hpvm-dnn "Running HPVM DNNs"
+  ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS dnn_benchmarks  # Compile all dnn benchmarks to run them
+  ARGS "-j1"  # Run DNN benchmarks sequentially
+)
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
index 860e3b6423bc78d073096a981f765bed10fb73a7..39f49784d76470c4e0bab213127369806e1e2531 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp
@@ -429,7 +429,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
index f44e19dece121cb01a1f3e6a8bf9e27ea945e6ce..dafd1a6ae084c4e1bf819ce1ac94e667c696eb24 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp
@@ -434,7 +434,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
index 6d8973ad982b1aa3b206a0cf40ee1888c37e293f..64350c590bb181fa4eaab4b2bf5fb37f69e11c09 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp
@@ -380,7 +380,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   uint8_t *labels = readLabels(labels_path.c_str(), 5000);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
index b2a940d501d8b1c2e29dbe7240012ace8197bbb4..72af2ff4a1b33aabac427d203101c32c4a7403c7 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp
@@ -386,7 +386,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   uint32_t *labels = readLabels3(labels_path.c_str(), 5000);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
index 474ab64cadf3eac158d39e6e1e6686765c3bac36..37e7a34a51a14b6903d549f271d3c0c83822fec8 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp
@@ -478,7 +478,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
index 10e95202f2e2188a9dcd1c12a168a612f897fcf9..1206d7bac4b9dcff2b4cfd7183f4a3e5f65d73d9 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp
@@ -483,7 +483,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
index 5c42f6953cfd9256cea73b39868a7ec571f18565..d7ab4238ebac5598b92c432aced85a602bb5ce89 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp
@@ -282,7 +282,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
index 0c2568f81b701cb474a257b190be61b4bba45f3e..26acc65a99287ea9f20e037dd996635315d76e48 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp
@@ -287,7 +287,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
index 01d027341686291c83e605bdeee1bbcffa68d6e9..5f8c63dbfbfb800dc6f60f9ed9a6108dee0a9a48 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp
@@ -1984,7 +1984,7 @@ int main(int argc, char *argv[]) {
   }
 
   std::string dir_prefix =
-      std::string(MODEL_PARAMS_DIR) + "/mobilenet_cifar10/";
+      std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
index e51e85dd980dd910389ec4415174e6e005f75c41..2070089053ef0b6e7e0ca33c2c6cc4cea17b8e29 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp
@@ -1989,7 +1989,7 @@ int main(int argc, char *argv[]) {
   }
 
   std::string dir_prefix =
-      std::string(MODEL_PARAMS_DIR) + "/mobilenet_cifar10/";
+      std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
index fa83c534d0639241205758018f8f7c37401e6b22..5b580f26821e67cc96c8347e485b792f40105176 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp
@@ -1318,7 +1318,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
index c7b789c2343a8dfd1e847652af2bd1d6adfd51f1..735e2c9abab91f00560faa5496e234321027b82c 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp
@@ -1249,7 +1249,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   uint32_t *labels = readLabels3(labels_path.c_str(), 5000);
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
index 91d07e30469e675fd2027f29290e35a0db888174..160563064cc47effd463c4915b0c7f0d93bff56f 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp
@@ -5151,7 +5151,7 @@ int main(int argc, char *argv[]) {
   }
 
   std::string dir_prefix =
-      std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/";
+      std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
index 932580e03e7ccc4495d8d76be2f7147369e36d68..c5cf2cb3a0177a5cce9ad0cf460484e63ded0ecd 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp
@@ -4927,7 +4927,7 @@ int main(int argc, char *argv[]) {
   }
 
   std::string dir_prefix =
-      std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/";
+      std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
index 195c676c11d53b19e0d18ed4908198a929d188aa..bec6139c2d089e90d09fa239e1b15c9a835fd4ea 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp
@@ -845,7 +845,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
index c304237ea57ba15d48cff0773860cdc469fc2a04..4fa7d5c121bacff122821fe983ed443e3c6db249 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp
@@ -850,7 +850,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
index 4cd5c134293d85983146352175e278915ab1d2ba..8666030fba4390d29d9324f5a5c7d60324325f05 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp
@@ -845,7 +845,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
index 532fca6b856f296624c21e9a18421763c4b70f48..6d01caa3b7c0875cff4f3e16131ddd09195e92b7 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp
@@ -850,7 +850,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
index 8e299f40e6ddd04a3ce9f8d9dffff49b1de36189..b1b2b4f2e312b6372e10a2fce3ef12eab2dddded 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp
@@ -893,7 +893,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
index 930a33e43c706e6e91475fc97671c39c23f63387..eb29e45805671072428318412f27b05d0da90199 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp
@@ -898,7 +898,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-  std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/";
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/config.h.in b/hpvm/test/dnn_benchmarks/hpvm-c/include/config.h.in
deleted file mode 100644
index 0eb8f3f24d0e51f2aaa12f1fd672043599490082..0000000000000000000000000000000000000000
--- a/hpvm/test/dnn_benchmarks/hpvm-c/include/config.h.in
+++ /dev/null
@@ -1 +0,0 @@
-#define MODEL_PARAMS_DIR "@MODEL_PARAMS_DIR@"
diff --git a/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt b/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e22eba67471855971005bf9e57ed0aa38dafff8
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/CMakeLists.txt
@@ -0,0 +1,40 @@
+#**************** FP32 and FP16 TensorRT Source Builds *********** 
+# Don't put binaries in build/bin. This doesn't affect global setting.
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+
+set(MODEL_PARAMS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../model_params/")
+set(test_compile_targets "")
+function(add_trt_source target_name filepath)
+  set(generated_file_path "${CMAKE_CURRENT_BINARY_DIR}/${target_name}.cpp")
+  configure_file(${filepath} ${generated_file_path})
+  add_executable(${target_name} ${generated_file_path})
+  target_link_libraries(${target_name} tensor_runtime_online)
+  set(test_compile_targets ${test_compile_targets} ${target_name} PARENT_SCOPE)
+endfunction(add_trt_source)
+
+set(
+  DNN_NAMES
+  alexnet_cifar10
+  alexnet2_cifar10
+  vgg16_cifar10
+  resnet18_cifar10
+  vgg16_cifar100
+  mobilenet_cifar10
+  alexnet_imagenet
+  vgg16_imagenet
+  resnet50_imagenet
+)
+foreach(dnn_name ${DNN_NAMES})
+  # FP32 version
+  if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/fp32/${dnn_name}.cc")
+    add_trt_source(${dnn_name}_fp32 "${CMAKE_CURRENT_SOURCE_DIR}/fp32/${dnn_name}.cc")
+  endif()
+  # FP16 version
+  if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/fp16/${dnn_name}_half.cc")
+    add_trt_source(${dnn_name}_fp16 "${CMAKE_CURRENT_SOURCE_DIR}/fp16/${dnn_name}_half.cc")
+  endif()
+endforeach()
+
+message(STATUS "List of tensor_runtime DNN benchmarks: ${test_compile_targets}")
+add_custom_target(trt_dnn_benchmarks DEPENDS ${test_compile_targets})
+message(STATUS "Target name for compiling all DNN benchmarks: trt_dnn_benchmarks")
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc
similarity index 96%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc
index 0b344035296bdbab2744e32604f3a8881feb6230..ab80718fd33d0b9787be4a0f183e3a7a65dc76e7 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet2_cifar10_half.cc
@@ -1,7 +1,8 @@
 
 
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
+
 
 /* NOTE: Reference Architecture to use for profiling */
 void testCifarNet() {
@@ -9,8 +10,7 @@ void testCifarNet() {
   printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
 
   std::string dir_prefix =
-      model_params_path + std::string("/alexnet2_cifar10/");
-  std::string input_path = dir_prefix + std::string("test_input.bin");
+      std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/";  std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
 
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc
similarity index 95%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc
index 7d493b8720ab701f87fdd53b315da7eafecf6637..b3b69d6b695eca9286b90685f3e071e234887d27 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/alexnet_cifar10_half.cc
@@ -1,13 +1,14 @@
 
 
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
+
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = model_params_path + std::string("/alexnet_cifar10/");
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc
similarity index 97%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc
index 03dc905bbfcb07ad9a266fc153cd1a6a0db9837e..44b78b9169707fd6c7b9ff6503a4a9aa8d2ec947 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/lenet_mnist_half.cc
@@ -1,7 +1,8 @@
 
 
 #include "tensor_runtime.h"
-#include "utils.h"
+#include "tensorUtils.h"
+
 
 /* NOTE: Reference Architecture to use for profiling */
 void testLenetTanh() {
@@ -11,7 +12,7 @@ void testLenetTanh() {
 
   int test_batch_size = 5000;
 
-  std::string dir_prefix = model_params_path + std::string("/lenet_mnist/");
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc
similarity index 99%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc
index d6eaef755743ce961d3d9c2f013eef26a77579f7..d4423bf4345756e72ad46b140ae8cafc26eae264 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/mobilenet_cifar10_half.cc
@@ -1,15 +1,15 @@
 
 
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
+
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      model_params_path + std::string("/mobilenet_cifar10/");
-
+      std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc
similarity index 98%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc
index 40e128eb8a80f6e080c090589a3e91b80ffa082f..76dea5ef08713d22fe7086b678bb3274378d0fd9 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/resnet18_cifar10_half.cc
@@ -1,15 +1,15 @@
 
 
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
+
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      model_params_path + std::string("/resnet18_cifar10/");
-
+      std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
 
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc
similarity index 98%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc
index eb3275b83009ec4300e9cb713f3b182727661db4..2772fd3da42d50aa2ff5391d1e3c85c610a4960a 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar100_half.cc
@@ -1,13 +1,14 @@
 
 
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
+
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = model_params_path + std::string("/vgg16_cifar100/");
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc
similarity index 98%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc
index 2a4b5a6a914698f621284e0f5b19843b817068df..954f6778b899d2cefb2b28d68a32fad33d52f70c 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp16/vgg16_cifar10_half.cc
@@ -1,13 +1,14 @@
 
 
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
+
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = model_params_path + std::string("/vgg16_cifar10/");
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc
similarity index 96%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc
index 396e9f11cae92c2f6613b5acb799caecbf025a59..e7431234d705449efa0fc5aafe23238e89be1d30 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet2_cifar10.cc
@@ -1,7 +1,8 @@
 
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
+
 
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
 
 /* NOTE: Reference Architecture to use for profiling */
 void testCifarNet() {
@@ -9,8 +10,7 @@ void testCifarNet() {
   printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
 
   std::string dir_prefix =
-      model_params_path + std::string("/alexnet2_cifar10/");
-  std::string input_path = dir_prefix + std::string("test_input.bin");
+      std::string("@MODEL_PARAMS_DIR@") + "/alexnet2_cifar10/";  std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
 
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc
similarity index 96%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc
index 600512078563baf850f440ea97e78cb2d73be170..12c304c9b401c586a0da4658b092f2b791268983 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_cifar10.cc
@@ -1,13 +1,14 @@
 
 
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
+
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = model_params_path + std::string("/alexnet_cifar10/");
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/alexnet_cifar10/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   // std::string labels_path = dir_prefix + std::string("labels.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc
similarity index 98%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_imagenet.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc
index 29909e5938ca0f700c4ee22165ae2ad354e53a32..b57e60c0fef41b283ad57a7b203759a8f014252d 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_imagenet.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/alexnet_imagenet.cc
@@ -1,15 +1,15 @@
 
 
 #include "tensor_runtime.h"
-#include "utils.h"
+#include "tensorUtils.h"
+
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      model_params_path + std::string("/alexnet_imagenet/");
-
+      std::string("@MODEL_PARAMS_DIR@") + "/alexnet_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc
similarity index 97%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc
index 61a0eeb441458ff6f91af8bc76ecc17a33428aec..9777670722b69c8b23a82a77312d17386f2d5c3f 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/lenet_mnist.cc
@@ -1,7 +1,8 @@
 
 
 #include "tensor_runtime.h"
-#include "../include/utils.h"
+#include "tensorUtils.h"
+
 
 int total_runs = 1;
 
@@ -10,7 +11,7 @@ void testLenetTanh() {
 
   int test_batch_size = 5000;
 
-  std::string dir_prefix = model_params_path + std::string("/lenet_mnist/");
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/lenet_mnist/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc
similarity index 99%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc
index 85849126cf164693d12fb08aba8326033ca61b82..3e37bf7feb6641af3afdeb8fb9f3a65fdfcbdce3 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/mobilenet_cifar10.cc
@@ -1,15 +1,14 @@
 
 
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      model_params_path + std::string("/mobilenet_cifar10/");
-  std::string input_path = dir_prefix + std::string("test_input.bin");
+      std::string("@MODEL_PARAMS_DIR@") + "/mobilenet_cifar10/";  std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
   void *conv2d_1_w =
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc
similarity index 98%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc
index bd3dd7dc1ea23f3cb8ad91e8632b347dd51a848b..c8a99419a81d19b374642c21c977a511413f9ae2 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet18_cifar10.cc
@@ -1,15 +1,15 @@
 
 
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
+
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      model_params_path + std::string("/resnet18_cifar10/");
-
+      std::string("@MODEL_PARAMS_DIR@") + "/resnet18_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
 
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc
similarity index 99%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc
index 0cccb124b0dca81d45887df50c4a9bcaf2a21db5..3aeabc22736e6955a9ad5ad07144fc38057616ea 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/resnet50_imagenet.cc
@@ -1,15 +1,15 @@
 
 
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
+
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
   std::string dir_prefix =
-      model_params_path + std::string("/resnet50_imagenet/");
-
+      std::string("@MODEL_PARAMS_DIR@") + "/resnet50_imagenet/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc
similarity index 98%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc
index 813874f0ed888ed5889b0574de454ca2720c944d..f7fffadfc36ba0fd248371efb35a1b7dfede68d3 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar10.cc
@@ -1,13 +1,13 @@
 
 
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = model_params_path + std::string("/vgg16_cifar10/");
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar10/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
   std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc
similarity index 98%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc
index 84164bf24bbff855b7a0975d7df2883e674b34c8..d3949c7cc568063f3b344d8497551fa1f4f4102c 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_cifar100.cc
@@ -1,13 +1,14 @@
 
 
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
+#include "tensor_runtime.h"
+#include "tensorUtils.h"
+
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = model_params_path + std::string("/vgg16_cifar100/");
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_cifar100/";
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_imagenet.cc b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc
similarity index 98%
rename from hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_imagenet.cc
rename to hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc
index eca833b08ff374e208f0cafabbf598cd0f7b5d90..2bb1be2821a8d33062bf1cfd83bb978f59884fa9 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_imagenet.cc
+++ b/hpvm/test/dnn_benchmarks/tensor-rt-src/fp32/vgg16_imagenet.cc
@@ -1,13 +1,14 @@
 
 
 #include "tensor_runtime.h"
-#include "utils.h"
+#include "tensorUtils.h"
+
 
 int main() {
 
   llvm_hpvm_initTensorRt(0);
 
-  std::string dir_prefix = model_params_path + std::string("/vgg16_imagenet/");
+  std::string dir_prefix = std::string("@MODEL_PARAMS_DIR@") + "/vgg16_imagenet/";
 
   std::string input_path = dir_prefix + std::string("test_input.bin");
   std::string labels_path = dir_prefix + std::string("test_labels.bin");
diff --git a/hpvm/tools/py-approxhpvm/CMakeLists.txt b/hpvm/tools/py-approxhpvm/CMakeLists.txt
index f93f96dd0578a67630cc859bba2e24e071b39299..f9d9d6ec60825fc1e858a28e0598189fd6b1cda5 100644
--- a/hpvm/tools/py-approxhpvm/CMakeLists.txt
+++ b/hpvm/tools/py-approxhpvm/CMakeLists.txt
@@ -1,15 +1,27 @@
 # This file is very tightly coupled with main.py.in.
 # Watch out and keep them in sync.
+# main.py.in (to become approxhpvm.py) requires the following variables:
+# LLVM_PROJECT_DIR, LLVM_BUILD_DIR
+# TRT_PATH, TRT_INCLUDE_DIRS, TRT_LINK_DIRS, TRT_LINK_LIBS
+# DIRECT_LINK_LIBS
+# AVAILABLE_PASSES, HPVM_RT_PATH
 
 set(LLVM_PROJECT_DIR ${CMAKE_SOURCE_DIR})
 set(LLVM_BUILD_DIR ${CMAKE_BINARY_DIR})
-set(LIB_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
+
+get_target_property(TRT_INCLUDE_DIRS tensor_runtime INCLUDE_DIRECTORIES)
+get_target_property(TRT_LINK_DIRS tensor_runtime TRT_LINK_DIRS)
+get_target_property(TRT_LINK_LIBS tensor_runtime TRT_LINK_LIBS)
+
+# This is defined globally. We need to manually link to this
+# because OpenCL functions are injected by HPVM Passes.
+set(DIRECT_LINK_LIBS ${OpenCL_LIBRARY} "$<TARGET_FILE:tensor_runtime>")
+
 # The hpvm-rt runtime
 # This has to be explicitly set as hpvm-rt.bc is created in a custom_target
 # and does not export its file location.
 # Keep this in sync with hpvm/projects/hpvm-rt/CMakeLists.txt.
 set(HPVM_RT_PATH ${LLVM_BUILD_DIR}/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc)
-set(TENSOR_RUNTIME_LIB "$<TARGET_FILE:tensor_runtime>")
 set(
     AVAILABLE_PASSES
     LLVMBuildDFG
@@ -21,8 +33,7 @@ set(
     LLVMClearDFG
     LLVMGenHPVM
 )
-# CUDA_TOOLKIT_ROOT_DIR and CUDNN_LIBRARY_PATH has been defined globally
-set(CUDNN_DIR ${CUDNN_LIBRARY_PATH})
+
 # First resolve all `@symbol@` by configuring the file
 configure_file(main.py.in ${CMAKE_CURRENT_BINARY_DIR}/main.py.conf)
 # Then resolve all generator expressions we configured into the previous file
@@ -32,17 +43,7 @@ file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main.py INPUT ${CMAKE_CURRENT_B
 # so we can set these dependencies on it
 set(
     DEPS
-    tensor_runtime
-    LLVMBuildDFG
-    LLVMInPlaceDFGAnalysis
-    LLVMDFG2LLVM_CPU
-    LLVMDFG2LLVM_CUDNN
-    LLVMDFG2LLVM_WrapperAPI
-    LLVMFuseHPVMTensorNodes
-    LLVMClearDFG
-    LLVMGenHPVM
-    hpvm-rt.bc
-    clang opt llvm-link
+    tensor_runtime hpvm-rt.bc clang opt llvm-link ${AVAILABLE_PASSES}
 )
 add_custom_command(
     OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/approxhpvm.py
diff --git a/hpvm/tools/py-approxhpvm/main.py.in b/hpvm/tools/py-approxhpvm/main.py.in
index af706a1eaa7a53879e525d87dd5034caf814db38..7b211911643c64d8bf2c34ef8a43e3ac98cdd88a 100644
--- a/hpvm/tools/py-approxhpvm/main.py.in
+++ b/hpvm/tools/py-approxhpvm/main.py.in
@@ -7,25 +7,17 @@ from typing import List, Union, Optional
 PathLike = Union[Path, str]
 
 HPVM_PROJECT_DIR = Path("@LLVM_PROJECT_DIR@") / "tools/hpvm"
-LLVM_BUILD_DIR = Path("@LLVM_BUILD_DIR@") / "bin"
-CUDA_TOOLKIT_ROOT_DIR = Path("@CUDA_TOOLKIT_ROOT_DIR@")
-TENSOR_RUNTIME_LIB = Path("@TENSOR_RUNTIME_LIB@")
+LLVM_BUILD_BIN = Path("@LLVM_BUILD_DIR@") / "bin"
+
+# Directories to include
+TRT_INCLUDE_DIRS = "@TRT_INCLUDE_DIRS@".split(";")
+TRT_LINK_DIRS = [Path(s) for s in "@TRT_LINK_DIRS@".split(";")]
+TRT_LINK_LIBS = "@TRT_LINK_LIBS@".split(";")
+DIRECT_LINK_LIBS = "@DIRECT_LINK_LIBS@".split(";")
+
 AVAILABLE_PASSES = "@AVAILABLE_PASSES@".split(";")
 HPVM_RT_PATH = "@HPVM_RT_PATH@"
-CUDNN_DIR = "@CUDNN_DIR@"
 
-# Directories to include
-INCLUDE_DIRS = [
-    HPVM_PROJECT_DIR / "include",  # HPVM include dir
-    # Tensor runtime include dir
-    HPVM_PROJECT_DIR / "projects/hpvm-tensor-rt/tensor_runtime/include",
-    HPVM_PROJECT_DIR / "test/dnn_benchmarks/hpvm-c/include",  # hpvm-c intrinsics decl dir
-    CUDA_TOOLKIT_ROOT_DIR / "include",  # CUDA include dir
-]
-LINK_DIRS = [CUDA_TOOLKIT_ROOT_DIR / "lib64", CUDNN_DIR, TENSOR_RUNTIME_LIB.parent]
-LINK_LIBS = [
-    "pthread", "cudart", "curand", "cudnn", "cublas", "cufft", "OpenCL", "stdc++fs", "omp", "m"
-]
 COMPILE_FLAGS = ["fno-exceptions", "std=c++11", "O3"]
 
 
@@ -72,10 +64,10 @@ def hpvm_c_to_ll(
     flags: List[str] = None,
 ) -> List[str]:
     extra_includes = extra_includes or []
-    includes = [f"-I{path}" for path in INCLUDE_DIRS + extra_includes]
+    includes = [f"-I{path}" for path in TRT_INCLUDE_DIRS + extra_includes]
     flags = [f"-{flg}" for flg in (flags or []) + COMPILE_FLAGS]
     return [
-        str(LLVM_BUILD_DIR / "clang++"), *includes, *flags, "-emit-llvm", "-S",
+        str(LLVM_BUILD_BIN / "clang++"), *includes, *flags, "-emit-llvm", "-S",
         str(src_file), "-o", str(target_file)
     ]
 
@@ -115,17 +107,31 @@ def opt_codegen_tensor(
 
 
 def link_hpvm_rt(src_file: PathLike, target_file: PathLike) -> List[str]:
-    return [str(LLVM_BUILD_DIR / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)]
+    return [str(LLVM_BUILD_BIN / "llvm-link"), str(src_file), HPVM_RT_PATH, "-o", str(target_file)]
 
 
 def link_binary(src_file: PathLike, target_file: PathLike) -> List[str]:
+    def drop_suffix(libname: str):
+        import re
+
+        match = re.match(r"lib(.*)\.so", libname)
+        return libname if match is None else match.group(1)
+
+    link_dirs, link_libnames = [], []
+    for lib in DIRECT_LINK_LIBS:
+        lib = Path(lib)
+        link_dirs.append(lib.parent)
+        link_libnames.append(drop_suffix(lib.name))
+    link_dirs += TRT_LINK_DIRS
+    link_libnames += TRT_LINK_LIBS
+
     linker_dir_flags = []
-    for path in LINK_DIRS:
+    for path in link_dirs:
         linker_dir_flags.extend([f"-L{path}", f"-Wl,-rpath={path}"])
-    linker_lib_flags = [f"-l{lib}" for lib in LINK_LIBS]
+    linker_lib_flags = [f"-l{drop_suffix(lib)}" for lib in link_libnames]
     return [
-        str(LLVM_BUILD_DIR / "clang++"), str(src_file), str(TENSOR_RUNTIME_LIB), "-o", str(target_file),
-        *linker_dir_flags, *linker_lib_flags
+        str(LLVM_BUILD_BIN / "clang++"), str(src_file),
+        "-o", str(target_file), *linker_dir_flags, *linker_lib_flags
     ]
 
 
@@ -141,7 +147,7 @@ def _run_opt(
     load_passes_strs = [s for pass_ in pass_names for s in ["-load", f"{pass_}.so"]]
     pass_flags_strs = [f"-{flag}" for flag in pass_flags]
     return [
-        str(LLVM_BUILD_DIR / "opt"), *load_passes_strs, *pass_flags_strs,
+        str(LLVM_BUILD_BIN / "opt"), *load_passes_strs, *pass_flags_strs,
         "-S", str(src_file), "-o", str(target_file)
     ]
 
@@ -154,6 +160,7 @@ def parse_args():
         "-t",
         "--codegen-target",
         type=str,
+        required=True,
         choices=["tensor", "cudnn"],
         help="Backend to use",
     )
@@ -165,7 +172,7 @@ def parse_args():
         help="File to approximation configurations; required for 'tensor' target"
     )
     parser.add_argument(
-        "-I", "--include", type=Path, nargs="+",
+        "-I", "--include", type=Path, action="append",
         help="Additional include directories to use"
     )