diff --git a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
index 0b66eb695097f368b2513055e35662a6a8b95fa1..d28868892f6d45e6905594e143a13aa83b1db9d6 100644
--- a/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
+++ b/hpvm/projects/hpvm-tensor-rt/CMakeLists.txt
@@ -209,6 +209,12 @@ target_link_libraries(vgg16_cifar100_fp32  tensor_runtime_online ${GPU_PROFILER_
 add_executable(mobilenet_cifar10_fp32  dnn_sources/src/fp32/mobilenet.cc)
 target_link_libraries(mobilenet_cifar10_fp32  tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
 
+add_executable(alexnet_imagenet_fp32  dnn_sources/src/fp32/alexnet_imagenet.cc)
+target_link_libraries(alexnet_imagenet_fp32  tensor_runtime_online  ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+
+add_executable(vgg16_imagenet_fp32  dnn_sources/src/fp32/vgg16_imagenet.cc)
+target_link_libraries(vgg16_imagenet_fp32  tensor_runtime_online  ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
+
 add_executable(resnet50_imagenet_fp32  dnn_sources/src/fp32/resnet50_imagenet.cc)
 target_link_libraries(resnet50_imagenet_fp32  tensor_runtime_online  ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h b/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
index 500ff63bc86dce6cae0dee3f942639c07bf14ab3..5d1e0e66ad1a3402981682ed97e664ddcc173787 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/include/utils.h
@@ -13,9 +13,11 @@
 #include <tensor_runtime.h>
 #include <tensor.h>
 #include <cmath>
+#include <string.h>
 
 
 std::vector<float> run_accuracies;
+std::string model_params_path = "../../../build/model_params/";
 
 
 void printTensorInfo(void* tensor_ptr){
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc
index 161cdd249cc1e94f0a739772e0b9b9ea86993be8..d93110945b1d1a70ec29c7788d9133dc16551ee5 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet2_cifar10_half.cc
@@ -18,7 +18,7 @@ void testCifarNet(){
 
   printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
  
-  std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); 
+  std::string dir_prefix = model_params_path + std::string("/alexnet2_cifar10/"); 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   std::string labels_path =  dir_prefix + std::string("labels.bin");
   std::string labels32_path =  dir_prefix + std::string("labels32.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc
index 8a429862f34f95793dd9ca7caa619b10dbe568ab..b7695bbd7a24712e335f0cf8bbd25290f3261dea 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/alexnet_cifar10_half.cc
@@ -14,7 +14,7 @@ int main(){
   llvm_hpvm_initTensorRt(0); 
 
 
-  std::string dir_prefix = std::string("../model_params/alexnet_cifar10/"); 
+  std::string dir_prefix = model_params_path + std::string("/alexnet_cifar10/"); 
 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   std::string labels_path =  dir_prefix + std::string("labels.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc
index f04ec041644394e2258414575162b961f9849667..29f392c630a36a6044c5f804e5d3a7b252591831 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/lenet_mnist_half.cc
@@ -21,7 +21,7 @@ void testLenetTanh(){
 
   int test_batch_size = 5000;
 
-  std::string dir_prefix = std::string("../model_params/lenet_mnist/");   
+  std::string dir_prefix = model_params_path + std::string("/lenet_mnist/");   
 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   std::string labels_path =  dir_prefix + std::string("labels.bin"); 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc
index dabafd4345f29d00c7271c796a8497aba8b7772d..d662dc1584c7810d8d3631d5ac16c427c3ff8b02 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_half.cc
@@ -14,7 +14,7 @@ int main(){
     llvm_hpvm_initTensorRt(0); 
 
 
-    std::string dir_prefix = std::string("../model_params/mobilenet/"); 
+    std::string dir_prefix = model_params_path + std::string("/mobilenet/"); 
     std::string input_path =  dir_prefix + std::string("input.bin"); 
     std::string labels_path =  dir_prefix + std::string("labels.bin"); 
     std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_shallow_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_shallow_half.cc
deleted file mode 100644
index 7ce9a90e10697c979adc470345244a2cc326f0cb..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/mobilenet_shallow_half.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-int main(int argc, char* argv[]){ 
-
-    llvm_hpvm_initTensorRt(0); 
-
-
-    std::string dir_prefix = std::string("../model_params/mobilenet_shallow/");
-
-    std::string input_path =  dir_prefix + std::string("input.bin"); 
-    std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-    std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-    void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-    std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-    void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-    std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-    void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-    std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-    void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-    std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-    void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-    std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-    void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-    std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-    void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-    std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-    void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-    std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-    void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-    std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-    void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-    std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-    void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-    std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-    void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-    std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-    void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-    std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-    void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-    std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-    void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-    std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-    void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-    std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-    void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-    std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-    void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-    std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-    void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-    std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-    void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-    std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-    void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-    std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-    void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-    void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-    void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-    void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-    std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-    void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-    std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-    void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-    void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-    void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-    void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-    std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-    void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-    std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-    void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-    void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-    void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-    void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-    std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-    void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-    std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-    void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-    void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-    void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-    std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-    void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-    std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-    void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-    std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-    void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-    void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-    void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-    void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-    std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-    void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-    std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-    void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-    void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-    void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-    void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-    std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-    void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-    std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-    void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-    void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-    void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-    void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-    std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-    void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-    std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-    void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-    void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-    void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-    std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-    void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-    std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-    void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-    std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-    void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-    std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-    void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-    std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-    void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-    std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-    void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-    std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-    void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); 
-    std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-    void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-    startMemTracking(); 
-
-    int test_input_size = 2000; 
-    int batch_size = 1000; 
-    int batch_count = test_input_size / batch_size; 
-
-
-    float final_accuracy = 0.0;
-
-    for(int i = 0; i < batch_count; i++){ 
-
-        int start = i * batch_size; 
-        int end = (i + 1) * batch_size; 
-
-        void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-        void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-        void* var_1 = tensorHalfBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-        void* var_2 = tensorHalfRelu(var_1); 
-        void* var_4 = tensorHalfConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-        void* var_5 = tensorHalfBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-        void* var_6 = tensorHalfRelu(var_5); 
-        void* var_7 = tensorHalfConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-        void* var_8 = tensorHalfBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-        void* var_9 = tensorHalfRelu(var_8); 
-        void* var_11 = tensorHalfConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-        void* var_12 = tensorHalfBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-        void* var_13 = tensorHalfRelu(var_12); 
-        void* var_14 = tensorHalfConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-        void* var_15 = tensorHalfBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-        void* var_16 = tensorHalfRelu(var_15); 
-        void* var_18 = tensorHalfConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-        void* var_19 = tensorHalfBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-        void* var_20 = tensorHalfRelu(var_19); 
-        void* var_21 = tensorHalfConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-        void* var_22 = tensorHalfBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-        void* var_23 = tensorHalfRelu(var_22); 
-        void* var_26 = tensorHalfConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-        void* var_27 = tensorHalfBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-        void* var_28 = tensorHalfRelu(var_27); 
-        void* var_29 = tensorHalfConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-        void* var_30 = tensorHalfBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-        void* var_31 = tensorHalfRelu(var_30); 
-        void* var_33 = tensorHalfConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-        void* var_34 = tensorHalfBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-        void* var_35 = tensorHalfRelu(var_34); 
-        void* var_36 = tensorHalfConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-        void* var_37 = tensorHalfBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-        void* var_38 = tensorHalfRelu(var_37); 
-        void* var_41 = tensorHalfConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-        void* var_42 = tensorHalfBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-        void* var_43 = tensorHalfRelu(var_42); 
-        void* var_44 = tensorHalfConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-        void* var_45 = tensorHalfBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-        void* var_46 = tensorHalfRelu(var_45); 
-        void* var_47 = tensorHalfPooling(var_46,1,2,2,0,0,2,2); 
-        void* var_49 = tensorHalfGemmGPU(var_47, dense_1_w); 
-        void* var_50 = tensorHalfAdd(var_49, dense_1_b); 
-        void* var_51 = tensorSoftmax(var_50); 
-
-        uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-        float accuracy = computeAccuracy2(labels, batch_size, var_51); 
-        final_accuracy += accuracy; 
-        freeBatchMemory(); 
-
-    }
-
-    final_accuracy = final_accuracy / batch_count;
-    dumpFinalAccuracy(final_accuracy);
-
-    dumpExecutionAccuracies();
-
-    llvm_hpvm_cleanupTensorRt(); 
-
-    return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/alexnet2_cifar10_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/alexnet2_cifar10_half_profiling.cc
deleted file mode 100644
index 82fe03247f36dbe6de31205a60344b7f44f85bad..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/alexnet2_cifar10_half_profiling.cc
+++ /dev/null
@@ -1,169 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../../include/utils.h"
-
-/* NOTE: Reference Architecture to use for profiling */
-void testCifarNet(){
-
-  printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
- 
-  std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); 
-  std::string input_path =  dir_prefix + std::string("norm_cifar_input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("test_labels.bin"); 
-
-  void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin",
-					  float_type, 32, 3, 3, 3);  
-  void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin",
-					  float_type, 32, 32, 3, 3);  
-  void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin",
-					float_type, 1, 32, 1, 1);
-  void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin",
-					  float_type, 64, 32, 3, 3);  
-  void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin",
-					  float_type, 64, 64, 3, 3);  
-  void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin",
-					float_type, 1, 64, 1, 1);
-  void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin",
-					  float_type, 128, 64, 3, 3);  
-  void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin",
-					float_type, 1, 128, 1, 1);
-  void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin",
-					  float_type, 128, 128, 3, 3);  
-  void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin",
-					float_type, 1, 128, 1, 1);
-  
-  void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin",
-					 float_type, 1, 1, 2048, 10);  
-  void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin",
-				      float_type, 1, 10, 1, 1);  
- 
-  
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-
-  startMemTracking();
-
-  int test_input_size = 5000;
-  int batch_size = 1000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  int total_runs = 10;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-  double total_energy = 0.0;
-
-  for (int i = 0; i < total_runs; i++){  
-	  for(int i = 0; i < batch_count; i++){
-
-		int start = i * batch_size;
-		int end = (i + 1) * batch_size;
-		void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-
-      	profiler.resume_profiler();
-		
-		void* conv1out = tensorHalfConvolution(input, conv1_filter, 1, 1, 1, 1,
-						   conv_mode, conv_precision);
-		tensorHalfAdd(conv1out, conv1_bias); 
-		void* conv1_tanh = tensorHalfTanh(conv1out);
-		
-		// 2nd Layer
-		void* conv2out = tensorHalfConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1,
-						   conv_mode, conv_precision);
-		tensorHalfAdd(conv2out, conv2_bias); 
-		void* conv2_tanh = tensorHalfTanh(conv2out);
-		void* pool2out = tensorHalfPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
-		 
-		// 3rd Layer
-		void* conv3out = tensorHalfConvolution(pool2out, conv3_filter, 1, 1, 1, 1,
-						   conv_mode, conv_precision);
-		tensorHalfAdd(conv3out, conv3_bias); 
-		void* conv3_tanh = tensorHalfTanh(conv3out);
-
-		// 4th Layer
-		void* conv4out = tensorHalfConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1,
-						   conv_mode, conv_precision);
-		tensorHalfAdd(conv4out, conv4_bias); 
-		void* conv4_tanh = tensorHalfTanh(conv4out);
-		void* pool4out = tensorHalfPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2);
-		
-		// 5th Layer
-		void* conv5out = tensorHalfConvolution(pool4out, conv5_filter, 1, 1, 1, 1,
-						   conv_mode, conv_precision);
-		tensorHalfAdd(conv5out, conv5_bias); 
-		void* conv5_tanh = tensorHalfTanh(conv5out);
-
-		// 6th Layer
-		void* conv6out = tensorHalfConvolution(conv5_tanh, conv6_filter, 1, 1, 1, 1,
-						   conv_mode, conv_precision);
-		tensorHalfAdd(conv6out, conv6_bias); 
-	  
-		void* conv6_tanh = tensorHalfTanh(conv6out);
-		void* pool6out = tensorHalfPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2);
-		
-		// final FC Layer
-		void* gemm1out = tensorHalfGemmGPU(pool6out, fc1_weights);  
-		void* gemm1biasout = tensorHalfAdd(gemm1out, fc1_bias);
-		void* result = tensorSoftmax(gemm1biasout);
-
-		profiler.pause_profiler();
-		auto time_energy = profiler.get_time_energy();
-		total_time += time_energy.first;
-		total_energy += time_energy.second;
-
-        profiler.reset();
-
-		uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-		float accuracy = computeAccuracy2(labels, batch_size, result); 
-		final_accuracy += accuracy;
-		
-    	freeBatchMemory();
-    }
-  }
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"Average energy: " << total_energy / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count / total_runs;
-  dumpFinalAccuracy(final_accuracy);
-
-}
-
-
-int main(int argc, char* argv[]){
-
-  llvm_hpvm_initTensorRt(0);
-
-  testCifarNet();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/alexnet_cifar10_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/alexnet_cifar10_half_profiling.cc
deleted file mode 100644
index 965e3170ea5c9df7dec1abe13d06581fe56f3b21..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/alexnet_cifar10_half_profiling.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  //void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  //uint8_t* labels = readLabels(labels_path.c_str(),10000); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv0.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv_bias0.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv3.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv_bias3.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv6.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv_bias6.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv7.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv_bias7.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv8.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv_bias8.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("fc12.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("fc_bias12.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 5000;
-  int batch_size = 1000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  int total_runs = 10;
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-  double total_energy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-  for (int i = 0; i < total_runs; i++){  
-      for(int i = 0; i < batch_count; i++){
-
-        int start = i * batch_size;
-        int end = (i + 1) * batch_size;
-        void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);    
-
-        profiler.resume_profiler();
-        void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 5, 5, 1, 1, 1, 0); 
-        void* var_1 = tensorHalfAdd(var_0, conv2d_1_b); 
-        void* var_2 = tensorHalfTanh(var_1); 
-        void* var_3 = tensorHalfPooling(var_2,0,2,2,0,0,2,2); 
-        void* var_5 = tensorHalfConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); 
-        void* var_6 = tensorHalfAdd(var_5, conv2d_2_b); 
-        void* var_7 = tensorHalfTanh(var_6); 
-        void* var_8 = tensorHalfPooling(var_7,0,2,2,0,0,2,2); 
-        void* var_10 = tensorHalfConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-        void* var_11 = tensorHalfAdd(var_10, conv2d_3_b); 
-        void* var_12 = tensorHalfTanh(var_11); 
-        void* var_13 = tensorHalfConvolution(var_12, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-        void* var_14 = tensorHalfAdd(var_13, conv2d_4_b); 
-        void* var_15 = tensorHalfTanh(var_14); 
-        void* var_16 = tensorHalfConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-        void* var_17 = tensorHalfAdd(var_16, conv2d_5_b); 
-        void* var_18 = tensorHalfTanh(var_17); 
-        void* var_19 = tensorHalfPooling(var_18,0,2,2,0,0,2,2); 
-        void* var_22 = tensorHalfGemmGPU(var_19, dense_1_w); 
-        void* var_23 = tensorHalfAdd(var_22, dense_1_b); 
-        void* var_24 = tensorSoftmax(var_23); 
-
-        profiler.pause_profiler();
-        auto time_energy = profiler.get_time_energy();
-        total_time += time_energy.first;
-        total_energy += time_energy.second;
-        profiler.reset();
-
-        uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-        float accuracy = computeAccuracy2(labels,batch_size,var_24); 
-        final_accuracy += accuracy;
-        
-        freeBatchMemory();
-      }
-  }
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"Average energy: " << total_energy / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count / total_runs;
-  dumpFinalAccuracy(final_accuracy);
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/lenet_keras_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/lenet_keras_half_profiling.cc
deleted file mode 100644
index e6ffd6b03de4901780511e56afdb5faac85bb807..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/lenet_keras_half_profiling.cc
+++ /dev/null
@@ -1,186 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  int total_runs = 10;
-
-  
-  printf("********* Lenet-2 Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 5000;
-
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-  
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				CUDNN_DATA_FLOAT,
-				test_batch_size, 1, 28, 28);
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin",
-					  float_type, 32, 1, 5, 5);    
-  void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin",
-					  float_type, 64, 32, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin",
-					 float_type, 1, 1, 7*7*64, 1024);  
-  void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin",
-				      float_type, 1, 1024, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin",
-					 float_type, 1, 1, 1024, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-  
-  clearTensorMap();
- 
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-  float final_accuracy = 0.0;
-
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performnce profiling 
-    profiler.resume_profiler();
-    startProfiling();
-
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* conv1out = tensorHalfConvolution(input, conv1_filter, 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-
-    // NOTE: For tensorAdd, the only dimension that MUST match is channels  
-    tensorHalfAdd(conv1out, conv1_bias); // NOTE: In place operation
-
-    void* pool1out = tensorHalfPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
-
-    void* conv1_tanh = tensorHalfTanh(pool1out);
-
-    // NOTE: input channels have to match between tensor op inputs and outputs 
-    void* conv2out = tensorHalfConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-    tensorHalfAdd(conv2out, conv2_bias); // NOTE: In place operation
-
-    void* pool2out = tensorHalfPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
-
-    void* conv2_tanh = tensorHalfTanh(pool2out);
-
-    void* gemm1out = tensorHalfGemm(conv2_tanh, fc1_weights);  
-
-    void* gemm1biasout = tensorHalfAdd(gemm1out, fc1_bias);
-
-    void* tanh1out = tensorHalfTanh(gemm1biasout);
-  
-    void* gemm2out = tensorHalfGemm(tanh1out, fc2_weights);  
-  
-    void* gemm2_biasout = tensorHalfAdd(gemm2out, fc2_bias);
-
-    void* tanh2out = tensorHalfTanh(gemm2_biasout);
-  
-    void* result = tensorSoftmax(tanh2out);
-
-    profiler.pause_profiler();
-    auto time_energy = profiler.get_time_energy();
-    total_time += time_energy.first;
-    profiler.reset();
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    float accuracy = computeAccuracy2(labels, test_batch_size, result);
-    final_accuracy += accuracy;
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  final_accuracy = final_accuracy / total_runs;
-  dumpFinalAccuracy(final_accuracy);
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_depthwise_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_depthwise_half_profiling.cc
deleted file mode 100644
index 641047b50dc1219f1d02bbfb75e2014840c90d96..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_depthwise_half_profiling.cc
+++ /dev/null
@@ -1,416 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-
-#include "../../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../../include/utils.h"
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/mobilenet/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_7_w_path =  dir_prefix + std::string("depthwise_conv2d_7_w.bin"); 
-  void* depthwise_conv2d_7_w =  readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_14_gamma_path =  dir_prefix + std::string("batch_normalization_14_gamma.bin"); 
-  void* batch_normalization_14_gamma =  readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_beta_path =  dir_prefix + std::string("batch_normalization_14_beta.bin"); 
-  void* batch_normalization_14_beta =  readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_mean_path =  dir_prefix + std::string("batch_normalization_14_mean.bin"); 
-  void* batch_normalization_14_mean =  readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_variance_path =  dir_prefix + std::string("batch_normalization_14_variance.bin"); 
-  void* batch_normalization_14_variance =  readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_15_gamma_path =  dir_prefix + std::string("batch_normalization_15_gamma.bin"); 
-  void* batch_normalization_15_gamma =  readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_beta_path =  dir_prefix + std::string("batch_normalization_15_beta.bin"); 
-  void* batch_normalization_15_beta =  readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_mean_path =  dir_prefix + std::string("batch_normalization_15_mean.bin"); 
-  void* batch_normalization_15_mean =  readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_variance_path =  dir_prefix + std::string("batch_normalization_15_variance.bin"); 
-  void* batch_normalization_15_variance =  readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_8_w_path =  dir_prefix + std::string("depthwise_conv2d_8_w.bin"); 
-  void* depthwise_conv2d_8_w =  readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_16_gamma_path =  dir_prefix + std::string("batch_normalization_16_gamma.bin"); 
-  void* batch_normalization_16_gamma =  readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_beta_path =  dir_prefix + std::string("batch_normalization_16_beta.bin"); 
-  void* batch_normalization_16_beta =  readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_mean_path =  dir_prefix + std::string("batch_normalization_16_mean.bin"); 
-  void* batch_normalization_16_mean =  readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_variance_path =  dir_prefix + std::string("batch_normalization_16_variance.bin"); 
-  void* batch_normalization_16_variance =  readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_17_gamma_path =  dir_prefix + std::string("batch_normalization_17_gamma.bin"); 
-  void* batch_normalization_17_gamma =  readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_beta_path =  dir_prefix + std::string("batch_normalization_17_beta.bin"); 
-  void* batch_normalization_17_beta =  readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_mean_path =  dir_prefix + std::string("batch_normalization_17_mean.bin"); 
-  void* batch_normalization_17_mean =  readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_variance_path =  dir_prefix + std::string("batch_normalization_17_variance.bin"); 
-  void* batch_normalization_17_variance =  readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_9_w_path =  dir_prefix + std::string("depthwise_conv2d_9_w.bin"); 
-  void* depthwise_conv2d_9_w =  readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_18_gamma_path =  dir_prefix + std::string("batch_normalization_18_gamma.bin"); 
-  void* batch_normalization_18_gamma =  readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_beta_path =  dir_prefix + std::string("batch_normalization_18_beta.bin"); 
-  void* batch_normalization_18_beta =  readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_mean_path =  dir_prefix + std::string("batch_normalization_18_mean.bin"); 
-  void* batch_normalization_18_mean =  readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_variance_path =  dir_prefix + std::string("batch_normalization_18_variance.bin"); 
-  void* batch_normalization_18_variance =  readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_19_gamma_path =  dir_prefix + std::string("batch_normalization_19_gamma.bin"); 
-  void* batch_normalization_19_gamma =  readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_beta_path =  dir_prefix + std::string("batch_normalization_19_beta.bin"); 
-  void* batch_normalization_19_beta =  readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_mean_path =  dir_prefix + std::string("batch_normalization_19_mean.bin"); 
-  void* batch_normalization_19_mean =  readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_variance_path =  dir_prefix + std::string("batch_normalization_19_variance.bin"); 
-  void* batch_normalization_19_variance =  readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_10_w_path =  dir_prefix + std::string("depthwise_conv2d_10_w.bin"); 
-  void* depthwise_conv2d_10_w =  readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_20_gamma_path =  dir_prefix + std::string("batch_normalization_20_gamma.bin"); 
-  void* batch_normalization_20_gamma =  readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_beta_path =  dir_prefix + std::string("batch_normalization_20_beta.bin"); 
-  void* batch_normalization_20_beta =  readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_mean_path =  dir_prefix + std::string("batch_normalization_20_mean.bin"); 
-  void* batch_normalization_20_mean =  readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_variance_path =  dir_prefix + std::string("batch_normalization_20_variance.bin"); 
-  void* batch_normalization_20_variance =  readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_21_gamma_path =  dir_prefix + std::string("batch_normalization_21_gamma.bin"); 
-  void* batch_normalization_21_gamma =  readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_beta_path =  dir_prefix + std::string("batch_normalization_21_beta.bin"); 
-  void* batch_normalization_21_beta =  readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_mean_path =  dir_prefix + std::string("batch_normalization_21_mean.bin"); 
-  void* batch_normalization_21_mean =  readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_variance_path =  dir_prefix + std::string("batch_normalization_21_variance.bin"); 
-  void* batch_normalization_21_variance =  readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_11_w_path =  dir_prefix + std::string("depthwise_conv2d_11_w.bin"); 
-  void* depthwise_conv2d_11_w =  readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_22_gamma_path =  dir_prefix + std::string("batch_normalization_22_gamma.bin"); 
-  void* batch_normalization_22_gamma =  readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_beta_path =  dir_prefix + std::string("batch_normalization_22_beta.bin"); 
-  void* batch_normalization_22_beta =  readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_mean_path =  dir_prefix + std::string("batch_normalization_22_mean.bin"); 
-  void* batch_normalization_22_mean =  readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_variance_path =  dir_prefix + std::string("batch_normalization_22_variance.bin"); 
-  void* batch_normalization_22_variance =  readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_23_gamma_path =  dir_prefix + std::string("batch_normalization_23_gamma.bin"); 
-  void* batch_normalization_23_gamma =  readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_beta_path =  dir_prefix + std::string("batch_normalization_23_beta.bin"); 
-  void* batch_normalization_23_beta =  readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_mean_path =  dir_prefix + std::string("batch_normalization_23_mean.bin"); 
-  void* batch_normalization_23_mean =  readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_variance_path =  dir_prefix + std::string("batch_normalization_23_variance.bin"); 
-  void* batch_normalization_23_variance =  readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_12_w_path =  dir_prefix + std::string("depthwise_conv2d_12_w.bin"); 
-  void* depthwise_conv2d_12_w =  readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_24_gamma_path =  dir_prefix + std::string("batch_normalization_24_gamma.bin"); 
-  void* batch_normalization_24_gamma =  readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_beta_path =  dir_prefix + std::string("batch_normalization_24_beta.bin"); 
-  void* batch_normalization_24_beta =  readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_mean_path =  dir_prefix + std::string("batch_normalization_24_mean.bin"); 
-  void* batch_normalization_24_mean =  readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_variance_path =  dir_prefix + std::string("batch_normalization_24_variance.bin"); 
-  void* batch_normalization_24_variance =  readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); 
-  std::string batch_normalization_25_gamma_path =  dir_prefix + std::string("batch_normalization_25_gamma.bin"); 
-  void* batch_normalization_25_gamma =  readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_beta_path =  dir_prefix + std::string("batch_normalization_25_beta.bin"); 
-  void* batch_normalization_25_beta =  readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_mean_path =  dir_prefix + std::string("batch_normalization_25_mean.bin"); 
-  void* batch_normalization_25_mean =  readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_variance_path =  dir_prefix + std::string("batch_normalization_25_variance.bin"); 
-  void* batch_normalization_25_variance =  readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string depthwise_conv2d_13_w_path =  dir_prefix + std::string("depthwise_conv2d_13_w.bin"); 
-  void* depthwise_conv2d_13_w =  readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); 
-  std::string batch_normalization_26_gamma_path =  dir_prefix + std::string("batch_normalization_26_gamma.bin"); 
-  void* batch_normalization_26_gamma =  readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_beta_path =  dir_prefix + std::string("batch_normalization_26_beta.bin"); 
-  void* batch_normalization_26_beta =  readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_mean_path =  dir_prefix + std::string("batch_normalization_26_mean.bin"); 
-  void* batch_normalization_26_mean =  readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_variance_path =  dir_prefix + std::string("batch_normalization_26_variance.bin"); 
-  void* batch_normalization_26_variance =  readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); 
-  std::string batch_normalization_27_gamma_path =  dir_prefix + std::string("batch_normalization_27_gamma.bin"); 
-  void* batch_normalization_27_gamma =  readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_beta_path =  dir_prefix + std::string("batch_normalization_27_beta.bin"); 
-  void* batch_normalization_27_beta =  readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_mean_path =  dir_prefix + std::string("batch_normalization_27_mean.bin"); 
-  void* batch_normalization_27_mean =  readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_variance_path =  dir_prefix + std::string("batch_normalization_27_variance.bin"); 
-  void* batch_normalization_27_variance =  readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000; 
-  int batch_size = 1000;  
-  int batch_count = test_input_size / batch_size; 
-
-  int total_runs = 10;
-  float final_accuracy = 0.0; 
-
-  for (int run_num = 0; run_num < total_runs; run_num++){
-      for(int i = 0; i < batch_count; i++){ 
-
-        int start = i * batch_size; 
-        int end = (i + 1) * batch_size; 
-
-        void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-        void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-        void* var_1 = tensorHalfBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-        void* var_2 = tensorHalfRelu(var_1); 
-        void* var_4 = tensorHalfConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-        void* var_5 = tensorHalfBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-        void* var_6 = tensorHalfRelu(var_5); 
-        void* var_7 = tensorHalfConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-        void* var_8 = tensorHalfBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-        void* var_9 = tensorHalfRelu(var_8); 
-        void* var_11 = tensorHalfConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-        void* var_12 = tensorHalfBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-        void* var_13 = tensorHalfRelu(var_12); 
-        void* var_14 = tensorHalfConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-        void* var_15 = tensorHalfBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-        void* var_16 = tensorHalfRelu(var_15); 
-        void* var_18 = tensorHalfConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-        void* var_19 = tensorHalfBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-        void* var_20 = tensorHalfRelu(var_19); 
-        void* var_21 = tensorHalfConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-        void* var_22 = tensorHalfBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-        void* var_23 = tensorHalfRelu(var_22); 
-        void* var_26 = tensorHalfConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-        void* var_27 = tensorHalfBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-        void* var_28 = tensorHalfRelu(var_27); 
-        void* var_29 = tensorHalfConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-        void* var_30 = tensorHalfBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-        void* var_31 = tensorHalfRelu(var_30); 
-        void* var_33 = tensorHalfConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-        void* var_34 = tensorHalfBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-        void* var_35 = tensorHalfRelu(var_34); 
-        void* var_36 = tensorHalfConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-        void* var_37 = tensorHalfBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-        void* var_38 = tensorHalfRelu(var_37); 
-        void* var_41 = tensorHalfConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-        void* var_42 = tensorHalfBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-        void* var_43 = tensorHalfRelu(var_42); 
-        void* var_44 = tensorHalfConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-        void* var_45 = tensorHalfBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-        void* var_46 = tensorHalfRelu(var_45); 
-        void* var_48 = tensorHalfConvCutlass(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); 
-        void* var_49 = tensorHalfBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); 
-        void* var_50 = tensorHalfRelu(var_49); 
-        void* var_51 = tensorHalfConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); 
-        void* var_52 = tensorHalfBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); 
-        void* var_53 = tensorHalfRelu(var_52); 
-        void* var_55 = tensorHalfConvCutlass(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); 
-        void* var_56 = tensorHalfBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); 
-        void* var_57 = tensorHalfRelu(var_56); 
-        void* var_58 = tensorHalfConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); 
-        void* var_59 = tensorHalfBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); 
-        void* var_60 = tensorHalfRelu(var_59); 
-        void* var_63 = tensorHalfConvCutlass(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); 
-        void* var_64 = tensorHalfBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); 
-        void* var_65 = tensorHalfRelu(var_64); 
-        void* var_66 = tensorHalfConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); 
-        void* var_67 = tensorHalfBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); 
-        void* var_68 = tensorHalfRelu(var_67); 
-        void* var_70 = tensorHalfConvCutlass(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); 
-        void* var_71 = tensorHalfBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); 
-        void* var_72 = tensorHalfRelu(var_71); 
-        void* var_73 = tensorHalfConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); 
-        void* var_74 = tensorHalfBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); 
-        void* var_75 = tensorHalfRelu(var_74); 
-        void* var_77 = tensorHalfConvCutlass(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); 
-        void* var_78 = tensorHalfBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); 
-        void* var_79 = tensorHalfRelu(var_78); 
-        void* var_80 = tensorHalfConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); 
-        void* var_81 = tensorHalfBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); 
-        void* var_82 = tensorHalfRelu(var_81); 
-        void* var_85 = tensorHalfConvCutlass(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); 
-        void* var_86 = tensorHalfBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); 
-        void* var_87 = tensorHalfRelu(var_86); 
-        void* var_88 = tensorHalfConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); 
-        void* var_89 = tensorHalfBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); 
-        void* var_90 = tensorHalfRelu(var_89); 
-        void* var_92 = tensorHalfConvCutlass(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); 
-        void* var_93 = tensorHalfBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); 
-        void* var_94 = tensorHalfRelu(var_93); 
-        void* var_95 = tensorHalfConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); 
-        void* var_96 = tensorHalfBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); 
-        void* var_97 = tensorHalfRelu(var_96); 
-        void* var_99 = tensorHalfPooling(var_97,1,2,2,0,0,2,2); 
-        void* var_101 = tensorHalfGemmGPU(var_99, dense_1_w); 
-        void* var_102 = tensorHalfAdd(var_101, dense_1_b); 
-        void* var_103 = tensorSoftmax(var_102); 
-
-        uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-        float accuracy = computeAccuracy2(labels, batch_size, var_103); 
-        final_accuracy += accuracy; 
-        freeBatchMemory(); 
-      }
-  }
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_half_cifar10_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_half_cifar10_profiling.cc
deleted file mode 100644
index 1c6a3955b1ad644363947106bb0f77d6b9a77050..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_half_cifar10_profiling.cc
+++ /dev/null
@@ -1,438 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-
-#include "../../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../../include/utils.h"
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_quant/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_7_w_path =  dir_prefix + std::string("depthwise_conv2d_7_w.bin"); 
-  void* depthwise_conv2d_7_w =  readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_14_gamma_path =  dir_prefix + std::string("batch_normalization_14_gamma.bin"); 
-  void* batch_normalization_14_gamma =  readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_beta_path =  dir_prefix + std::string("batch_normalization_14_beta.bin"); 
-  void* batch_normalization_14_beta =  readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_mean_path =  dir_prefix + std::string("batch_normalization_14_mean.bin"); 
-  void* batch_normalization_14_mean =  readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_variance_path =  dir_prefix + std::string("batch_normalization_14_variance.bin"); 
-  void* batch_normalization_14_variance =  readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_15_gamma_path =  dir_prefix + std::string("batch_normalization_15_gamma.bin"); 
-  void* batch_normalization_15_gamma =  readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_beta_path =  dir_prefix + std::string("batch_normalization_15_beta.bin"); 
-  void* batch_normalization_15_beta =  readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_mean_path =  dir_prefix + std::string("batch_normalization_15_mean.bin"); 
-  void* batch_normalization_15_mean =  readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_variance_path =  dir_prefix + std::string("batch_normalization_15_variance.bin"); 
-  void* batch_normalization_15_variance =  readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_8_w_path =  dir_prefix + std::string("depthwise_conv2d_8_w.bin"); 
-  void* depthwise_conv2d_8_w =  readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_16_gamma_path =  dir_prefix + std::string("batch_normalization_16_gamma.bin"); 
-  void* batch_normalization_16_gamma =  readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_beta_path =  dir_prefix + std::string("batch_normalization_16_beta.bin"); 
-  void* batch_normalization_16_beta =  readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_mean_path =  dir_prefix + std::string("batch_normalization_16_mean.bin"); 
-  void* batch_normalization_16_mean =  readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_variance_path =  dir_prefix + std::string("batch_normalization_16_variance.bin"); 
-  void* batch_normalization_16_variance =  readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_17_gamma_path =  dir_prefix + std::string("batch_normalization_17_gamma.bin"); 
-  void* batch_normalization_17_gamma =  readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_beta_path =  dir_prefix + std::string("batch_normalization_17_beta.bin"); 
-  void* batch_normalization_17_beta =  readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_mean_path =  dir_prefix + std::string("batch_normalization_17_mean.bin"); 
-  void* batch_normalization_17_mean =  readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_variance_path =  dir_prefix + std::string("batch_normalization_17_variance.bin"); 
-  void* batch_normalization_17_variance =  readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_9_w_path =  dir_prefix + std::string("depthwise_conv2d_9_w.bin"); 
-  void* depthwise_conv2d_9_w =  readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_18_gamma_path =  dir_prefix + std::string("batch_normalization_18_gamma.bin"); 
-  void* batch_normalization_18_gamma =  readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_beta_path =  dir_prefix + std::string("batch_normalization_18_beta.bin"); 
-  void* batch_normalization_18_beta =  readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_mean_path =  dir_prefix + std::string("batch_normalization_18_mean.bin"); 
-  void* batch_normalization_18_mean =  readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_variance_path =  dir_prefix + std::string("batch_normalization_18_variance.bin"); 
-  void* batch_normalization_18_variance =  readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_19_gamma_path =  dir_prefix + std::string("batch_normalization_19_gamma.bin"); 
-  void* batch_normalization_19_gamma =  readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_beta_path =  dir_prefix + std::string("batch_normalization_19_beta.bin"); 
-  void* batch_normalization_19_beta =  readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_mean_path =  dir_prefix + std::string("batch_normalization_19_mean.bin"); 
-  void* batch_normalization_19_mean =  readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_variance_path =  dir_prefix + std::string("batch_normalization_19_variance.bin"); 
-  void* batch_normalization_19_variance =  readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_10_w_path =  dir_prefix + std::string("depthwise_conv2d_10_w.bin"); 
-  void* depthwise_conv2d_10_w =  readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_20_gamma_path =  dir_prefix + std::string("batch_normalization_20_gamma.bin"); 
-  void* batch_normalization_20_gamma =  readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_beta_path =  dir_prefix + std::string("batch_normalization_20_beta.bin"); 
-  void* batch_normalization_20_beta =  readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_mean_path =  dir_prefix + std::string("batch_normalization_20_mean.bin"); 
-  void* batch_normalization_20_mean =  readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_variance_path =  dir_prefix + std::string("batch_normalization_20_variance.bin"); 
-  void* batch_normalization_20_variance =  readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_21_gamma_path =  dir_prefix + std::string("batch_normalization_21_gamma.bin"); 
-  void* batch_normalization_21_gamma =  readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_beta_path =  dir_prefix + std::string("batch_normalization_21_beta.bin"); 
-  void* batch_normalization_21_beta =  readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_mean_path =  dir_prefix + std::string("batch_normalization_21_mean.bin"); 
-  void* batch_normalization_21_mean =  readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_variance_path =  dir_prefix + std::string("batch_normalization_21_variance.bin"); 
-  void* batch_normalization_21_variance =  readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_11_w_path =  dir_prefix + std::string("depthwise_conv2d_11_w.bin"); 
-  void* depthwise_conv2d_11_w =  readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_22_gamma_path =  dir_prefix + std::string("batch_normalization_22_gamma.bin"); 
-  void* batch_normalization_22_gamma =  readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_beta_path =  dir_prefix + std::string("batch_normalization_22_beta.bin"); 
-  void* batch_normalization_22_beta =  readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_mean_path =  dir_prefix + std::string("batch_normalization_22_mean.bin"); 
-  void* batch_normalization_22_mean =  readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_variance_path =  dir_prefix + std::string("batch_normalization_22_variance.bin"); 
-  void* batch_normalization_22_variance =  readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_23_gamma_path =  dir_prefix + std::string("batch_normalization_23_gamma.bin"); 
-  void* batch_normalization_23_gamma =  readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_beta_path =  dir_prefix + std::string("batch_normalization_23_beta.bin"); 
-  void* batch_normalization_23_beta =  readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_mean_path =  dir_prefix + std::string("batch_normalization_23_mean.bin"); 
-  void* batch_normalization_23_mean =  readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_variance_path =  dir_prefix + std::string("batch_normalization_23_variance.bin"); 
-  void* batch_normalization_23_variance =  readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_12_w_path =  dir_prefix + std::string("depthwise_conv2d_12_w.bin"); 
-  void* depthwise_conv2d_12_w =  readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_24_gamma_path =  dir_prefix + std::string("batch_normalization_24_gamma.bin"); 
-  void* batch_normalization_24_gamma =  readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_beta_path =  dir_prefix + std::string("batch_normalization_24_beta.bin"); 
-  void* batch_normalization_24_beta =  readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_mean_path =  dir_prefix + std::string("batch_normalization_24_mean.bin"); 
-  void* batch_normalization_24_mean =  readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_variance_path =  dir_prefix + std::string("batch_normalization_24_variance.bin"); 
-  void* batch_normalization_24_variance =  readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); 
-  std::string batch_normalization_25_gamma_path =  dir_prefix + std::string("batch_normalization_25_gamma.bin"); 
-  void* batch_normalization_25_gamma =  readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_beta_path =  dir_prefix + std::string("batch_normalization_25_beta.bin"); 
-  void* batch_normalization_25_beta =  readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_mean_path =  dir_prefix + std::string("batch_normalization_25_mean.bin"); 
-  void* batch_normalization_25_mean =  readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_variance_path =  dir_prefix + std::string("batch_normalization_25_variance.bin"); 
-  void* batch_normalization_25_variance =  readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string depthwise_conv2d_13_w_path =  dir_prefix + std::string("depthwise_conv2d_13_w.bin"); 
-  void* depthwise_conv2d_13_w =  readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); 
-  std::string batch_normalization_26_gamma_path =  dir_prefix + std::string("batch_normalization_26_gamma.bin"); 
-  void* batch_normalization_26_gamma =  readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_beta_path =  dir_prefix + std::string("batch_normalization_26_beta.bin"); 
-  void* batch_normalization_26_beta =  readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_mean_path =  dir_prefix + std::string("batch_normalization_26_mean.bin"); 
-  void* batch_normalization_26_mean =  readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_variance_path =  dir_prefix + std::string("batch_normalization_26_variance.bin"); 
-  void* batch_normalization_26_variance =  readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); 
-  std::string batch_normalization_27_gamma_path =  dir_prefix + std::string("batch_normalization_27_gamma.bin"); 
-  void* batch_normalization_27_gamma =  readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_beta_path =  dir_prefix + std::string("batch_normalization_27_beta.bin"); 
-  void* batch_normalization_27_beta =  readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_mean_path =  dir_prefix + std::string("batch_normalization_27_mean.bin"); 
-  void* batch_normalization_27_mean =  readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_variance_path =  dir_prefix + std::string("batch_normalization_27_variance.bin"); 
-  void* batch_normalization_27_variance =  readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking(); 
-
-  startProfiling();
-  
-  int test_input_size = 5000;
-  int batch_size = 1000;
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  int total_runs = 10;
-
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-
-  for(int i = 0; i < total_runs; i++){
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size; 
-    int end = (i + 1) * batch_size; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-    profiler.resume_profiler();
-    void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-    void* var_1 = tensorHalfBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-    void* var_2 = tensorHalfRelu(var_1); 
-    void* var_4 = tensorHalfConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-    void* var_5 = tensorHalfBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-    void* var_6 = tensorHalfRelu(var_5); 
-    void* var_7 = tensorHalfConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-    void* var_8 = tensorHalfBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-    void* var_9 = tensorHalfRelu(var_8); 
-    void* var_11 = tensorHalfConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-    void* var_12 = tensorHalfBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-    void* var_13 = tensorHalfRelu(var_12); 
-    void* var_14 = tensorHalfConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-    void* var_15 = tensorHalfBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-    void* var_16 = tensorHalfRelu(var_15); 
-    void* var_18 = tensorHalfConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-    void* var_19 = tensorHalfBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-    void* var_20 = tensorHalfRelu(var_19); 
-    void* var_21 = tensorHalfConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-    void* var_22 = tensorHalfBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-    void* var_23 = tensorHalfRelu(var_22); 
-    void* var_26 = tensorHalfConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-    void* var_27 = tensorHalfBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-    void* var_28 = tensorHalfRelu(var_27); 
-    void* var_29 = tensorHalfConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-    void* var_30 = tensorHalfBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-    void* var_31 = tensorHalfRelu(var_30); 
-    void* var_33 = tensorHalfConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-    void* var_34 = tensorHalfBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-    void* var_35 = tensorHalfRelu(var_34); 
-    void* var_36 = tensorHalfConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-    void* var_37 = tensorHalfBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-    void* var_38 = tensorHalfRelu(var_37); 
-    void* var_41 = tensorHalfConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-    void* var_42 = tensorHalfBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-    void* var_43 = tensorHalfRelu(var_42); 
-    void* var_44 = tensorHalfConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-    void* var_45 = tensorHalfBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-    void* var_46 = tensorHalfRelu(var_45); 
-    void* var_48 = tensorHalfConvolution(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); 
-    void* var_49 = tensorHalfBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); 
-    void* var_50 = tensorHalfRelu(var_49); 
-    void* var_51 = tensorHalfConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); 
-    void* var_52 = tensorHalfBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); 
-    void* var_53 = tensorHalfRelu(var_52); 
-    void* var_55 = tensorHalfConvolution(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); 
-    void* var_56 = tensorHalfBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); 
-    void* var_57 = tensorHalfRelu(var_56); 
-    void* var_58 = tensorHalfConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); 
-    void* var_59 = tensorHalfBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); 
-    void* var_60 = tensorHalfRelu(var_59); 
-    void* var_63 = tensorHalfConvolution(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); 
-    void* var_64 = tensorHalfBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); 
-    void* var_65 = tensorHalfRelu(var_64); 
-    void* var_66 = tensorHalfConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); 
-    void* var_67 = tensorHalfBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); 
-    void* var_68 = tensorHalfRelu(var_67); 
-    void* var_70 = tensorHalfConvolution(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); 
-    void* var_71 = tensorHalfBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); 
-    void* var_72 = tensorHalfRelu(var_71); 
-    void* var_73 = tensorHalfConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); 
-    void* var_74 = tensorHalfBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); 
-    void* var_75 = tensorHalfRelu(var_74); 
-    void* var_77 = tensorHalfConvolution(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); 
-    void* var_78 = tensorHalfBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); 
-    void* var_79 = tensorHalfRelu(var_78); 
-    void* var_80 = tensorHalfConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); 
-    void* var_81 = tensorHalfBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); 
-    void* var_82 = tensorHalfRelu(var_81); 
-    void* var_85 = tensorHalfConvolution(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); 
-    void* var_86 = tensorHalfBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); 
-    void* var_87 = tensorHalfRelu(var_86); 
-    void* var_88 = tensorHalfConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); 
-    void* var_89 = tensorHalfBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); 
-    void* var_90 = tensorHalfRelu(var_89); 
-    void* var_92 = tensorHalfConvolution(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); 
-    void* var_93 = tensorHalfBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); 
-    void* var_94 = tensorHalfRelu(var_93); 
-    void* var_95 = tensorHalfConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); 
-    void* var_96 = tensorHalfBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); 
-    void* var_97 = tensorHalfRelu(var_96); 
-    void* var_99 = tensorHalfPooling(var_97,1,2,2,0,0,2,2); 
-    void* var_101 = tensorHalfGemmGPU(var_99, dense_1_w); 
-    void* var_102 = tensorHalfAdd(var_101, dense_1_b); 
-    void* var_103 = tensorSoftmax(var_102); 
-
-      profiler.pause_profiler();
-      auto time_energy = profiler.get_time_energy();
-      total_time += time_energy.first;
-      profiler.reset();
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, var_103); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-  }
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  stopProfiling();
-  
-  final_accuracy = final_accuracy / batch_count / total_runs; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_shallow_depthwise_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_shallow_depthwise_half_profiling.cc
deleted file mode 100644
index f68eb1793b66b0579f2ed6dbff26a56677f2aa95..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_shallow_depthwise_half_profiling.cc
+++ /dev/null
@@ -1,249 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-
-#include "../../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../../include/utils.h"
-
-
-int main(int argc, char* argv[]){ 
-
-  int total_runs = 10;
-  if (argc > 1){
-    total_runs = atoi(argv[1]);
-  }
-
-  
-  llvm_hpvm_initTensorRt(0); 
-
-  //std::string dir_prefix = std::string("../../keras/data/mobilenet_shallow_nathan/");
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_shallow/");
-
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000; 
-  int batch_size = 1000; 
-  int batch_count = test_input_size / batch_size; 
-
-
-  float final_accuracy = 0.0;
-
-  for(int j = 0; j < total_runs; j++){    
-    for(int i = 0; i < batch_count; i++){ 
-
-      int start = i * batch_size; 
-      int end = (i + 1) * batch_size; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-      void* var_1 = tensorHalfBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-      void* var_2 = tensorHalfRelu(var_1); 
-      void* var_4 = tensorHalfConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-      void* var_5 = tensorHalfBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-      void* var_6 = tensorHalfRelu(var_5); 
-      void* var_7 = tensorHalfConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-      void* var_8 = tensorHalfBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-      void* var_9 = tensorHalfRelu(var_8); 
-      void* var_11 = tensorHalfConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-      void* var_12 = tensorHalfBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-      void* var_13 = tensorHalfRelu(var_12); 
-      void* var_14 = tensorHalfConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-      void* var_15 = tensorHalfBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-      void* var_16 = tensorHalfRelu(var_15); 
-      void* var_18 = tensorHalfConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-      void* var_19 = tensorHalfBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-      void* var_20 = tensorHalfRelu(var_19); 
-      void* var_21 = tensorHalfConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-      void* var_22 = tensorHalfBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-      void* var_23 = tensorHalfRelu(var_22); 
-      void* var_26 = tensorHalfConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-      void* var_27 = tensorHalfBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-      void* var_28 = tensorHalfRelu(var_27); 
-      void* var_29 = tensorHalfConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-      void* var_30 = tensorHalfBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-      void* var_31 = tensorHalfRelu(var_30); 
-      void* var_33 = tensorHalfConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-      void* var_34 = tensorHalfBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-      void* var_35 = tensorHalfRelu(var_34); 
-      void* var_36 = tensorHalfConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-      void* var_37 = tensorHalfBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-      void* var_38 = tensorHalfRelu(var_37); 
-      void* var_41 = tensorHalfConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-      void* var_42 = tensorHalfBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-      void* var_43 = tensorHalfRelu(var_42); 
-      void* var_44 = tensorHalfConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-      void* var_45 = tensorHalfBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-      void* var_46 = tensorHalfRelu(var_45); 
-      void* var_47 = tensorHalfPooling(var_46,1,2,2,0,0,2,2); 
-      void* var_49 = tensorHalfGemmGPU(var_47, dense_1_w); 
-      void* var_50 = tensorHalfAdd(var_49, dense_1_b); 
-      void* var_51 = tensorSoftmax(var_50); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_51); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    //final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy / batch_count); 
-  }
-
-  final_accuracy = final_accuracy / batch_count / total_runs; 
-  dumpFinalAccuracy(final_accuracy);
-
-  dumpExecutionAccuracies();
-    
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_shallow_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_shallow_half_profiling.cc
deleted file mode 100644
index c641db1a05efe44d4801da1ebdcaf2ae8945e7f2..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/mobilenet_shallow_half_profiling.cc
+++ /dev/null
@@ -1,225 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-
-#include "../../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../../include/utils.h"
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000;
-  int batch_size = 1000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  int total_runs = 10;
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-
-  for(int i = 0; i < total_runs; i++){
-	  for(int i = 0; i < batch_count; i++){ 
-
-		int start = i * batch_size; 
-		int end = (i + 1) * batch_size; 
-
-		void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-        profiler.resume_profiler();
-
-		void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-		void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-		void* var_2 = tensorHalfRelu(var_1); 
-		void* var_4 = tensorHalfConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-		void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-		void* var_6 = tensorHalfRelu(var_5); 
-		void* var_7 = tensorHalfConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-		void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-		void* var_9 = tensorHalfRelu(var_8); 
-		void* var_11 = tensorHalfConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-		void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-		void* var_13 = tensorHalfRelu(var_12); 
-		void* var_14 = tensorHalfConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-		void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-		void* var_16 = tensorHalfRelu(var_15); 
-		void* var_18 = tensorHalfConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 2, 2, 1, 64); 
-		void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-		void* var_20 = tensorHalfRelu(var_19); 
-		void* var_21 = tensorHalfConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-		void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-		void* var_23 = tensorHalfRelu(var_22); 
-		void* var_26 = tensorHalfConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-		void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-		void* var_28 = tensorHalfRelu(var_27); 
-		void* var_29 = tensorHalfConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-		void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-		void* var_31 = tensorHalfRelu(var_30); 
-		void* var_33 = tensorHalfConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-		void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-		void* var_35 = tensorHalfRelu(var_34); 
-		void* var_36 = tensorHalfConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-		void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-		void* var_38 = tensorHalfRelu(var_37); 
-		void* var_40 = tensorHalfPooling(var_38,1,2,2,0,0,2,2); 
-		void* var_42 = tensorHalfGemmGPU(var_40, dense_1_w); 
-		void* var_43 = tensorHalfAdd(var_42, dense_1_b); 
-		void* var_44 = tensorSoftmax(var_43); 
-
-        profiler.pause_profiler();
-        auto time_energy = profiler.get_time_energy();
-        total_time += time_energy.first;
-        profiler.reset();
-
-		uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-		float accuracy = computeAccuracy2(labels, batch_size, var_44); 
-		final_accuracy += accuracy; 
-		freeBatchMemory(); 
-	  } 
-  }
-
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  final_accuracy = final_accuracy / batch_count / total_runs; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/resnet18_cifar10_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/resnet18_cifar10_half_profiling.cc
deleted file mode 100644
index f91814e8390a400159467298a3702147cbf2f4b3..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/resnet18_cifar10_half_profiling.cc
+++ /dev/null
@@ -1,242 +0,0 @@
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-
-#include "../../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../../include/utils.h"
-
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-  
-  std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  //uint8_t* labels = readLabels(labels_path.c_str(), batch_size); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_14_b_path =  dir_prefix + std::string("conv2d_14_b.bin"); 
-  void* conv2d_14_b =  readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_15_w_path =  dir_prefix + std::string("conv2d_15_w.bin"); 
-  void* conv2d_15_w =  readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); 
-  std::string conv2d_15_b_path =  dir_prefix + std::string("conv2d_15_b.bin"); 
-  void* conv2d_15_b =  readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_17_w_path =  dir_prefix + std::string("conv2d_17_w.bin"); 
-  void* conv2d_17_w =  readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); 
-  std::string conv2d_17_b_path =  dir_prefix + std::string("conv2d_17_b.bin"); 
-  void* conv2d_17_b =  readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_16_w_path =  dir_prefix + std::string("conv2d_16_w.bin"); 
-  void* conv2d_16_w =  readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_16_b_path =  dir_prefix + std::string("conv2d_16_b.bin"); 
-  void* conv2d_16_b =  readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_18_w_path =  dir_prefix + std::string("conv2d_18_w.bin"); 
-  void* conv2d_18_w =  readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_18_b_path =  dir_prefix + std::string("conv2d_18_b.bin"); 
-  void* conv2d_18_b =  readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_19_w_path =  dir_prefix + std::string("conv2d_19_w.bin"); 
-  void* conv2d_19_w =  readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_19_b_path =  dir_prefix + std::string("conv2d_19_b.bin"); 
-  void* conv2d_19_b =  readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_20_w_path =  dir_prefix + std::string("conv2d_20_w.bin"); 
-  void* conv2d_20_w =  readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_20_b_path =  dir_prefix + std::string("conv2d_20_b.bin"); 
-  void* conv2d_20_b =  readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_21_w_path =  dir_prefix + std::string("conv2d_21_w.bin"); 
-  void* conv2d_21_w =  readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_21_b_path =  dir_prefix + std::string("conv2d_21_b.bin"); 
-  void* conv2d_21_b =  readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 5000;
-  int batch_size = 1000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  int total_runs = 10; //100;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-
-  Profiler profiler;
-  profiler.start_profiler();
-  double total_time = 0.0;
-
-  for (int itrs = 0; itrs < total_runs; itrs++){ 
-      for(int i = 0; i < batch_count; i++){
-
-        int start = i * batch_size;
-        int end = (i + 1) * batch_size;
-        
-        void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-
-        profiler.resume_profiler();
-        
-        void* var_2 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-        void* var_3 = tensorHalfAdd(var_2, conv2d_1_b); 
-        void* var_4 = tensorHalfRelu(var_3); 
-        void* var_6 = tensorHalfConvolution(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-        void* var_7 = tensorHalfAdd(var_6, conv2d_2_b); 
-        void* var_8 = tensorHalfRelu(var_7); 
-        void* var_10 = tensorHalfConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-        void* var_11 = tensorHalfAdd(var_10, conv2d_3_b); 
-        void* var_12 = tensorHalfAdd(var_4, var_11); 
-        void* var_13 = tensorHalfRelu(var_12); 
-        void* var_15 = tensorHalfConvolution(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-        void* var_16 = tensorHalfAdd(var_15, conv2d_4_b); 
-        void* var_17 = tensorHalfRelu(var_16); 
-        void* var_19 = tensorHalfConvolution(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-        void* var_20 = tensorHalfAdd(var_19, conv2d_5_b); 
-        void* var_21 = tensorHalfAdd(var_13, var_20); 
-        void* var_22 = tensorHalfRelu(var_21); 
-        void* var_24 = tensorHalfConvolution(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-        void* var_25 = tensorHalfAdd(var_24, conv2d_6_b); 
-        void* var_26 = tensorHalfRelu(var_25); 
-        void* var_28 = tensorHalfConvolution(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-        void* var_29 = tensorHalfAdd(var_28, conv2d_7_b); 
-        void* var_30 = tensorHalfAdd(var_22, var_29); 
-        void* var_31 = tensorHalfRelu(var_30); 
-        void* var_33 = tensorHalfConvolution(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0); 
-        void* var_34 = tensorHalfAdd(var_33, conv2d_8_b); 
-        void* var_35 = tensorHalfRelu(var_34); 
-        void* var_37 = tensorHalfConvolution(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-        void* var_38 = tensorHalfAdd(var_37, conv2d_9_b); 
-        void* var_40 = tensorHalfConvolution(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0); 
-        void* var_41 = tensorHalfAdd(var_40, conv2d_10_b); 
-        void* var_42 = tensorHalfAdd(var_41, var_38); 
-        void* var_43 = tensorHalfRelu(var_42); 
-        void* var_45 = tensorHalfConvolution(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-        void* var_46 = tensorHalfAdd(var_45, conv2d_11_b); 
-        void* var_47 = tensorHalfRelu(var_46); 
-        void* var_49 = tensorHalfConvolution(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-        void* var_50 = tensorHalfAdd(var_49, conv2d_12_b); 
-        void* var_51 = tensorHalfAdd(var_43, var_50); 
-        void* var_52 = tensorHalfRelu(var_51); 
-        void* var_54 = tensorHalfConvolution(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-        void* var_55 = tensorHalfAdd(var_54, conv2d_13_b); 
-        void* var_56 = tensorHalfRelu(var_55); 
-        void* var_58 = tensorHalfConvolution(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0); 
-        void* var_59 = tensorHalfAdd(var_58, conv2d_14_b); 
-        void* var_60 = tensorHalfAdd(var_52, var_59); 
-        void* var_61 = tensorHalfRelu(var_60); 
-        void* var_63 = tensorHalfConvolution(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0); 
-        void* var_64 = tensorHalfAdd(var_63, conv2d_15_b); 
-        void* var_65 = tensorHalfRelu(var_64); 
-        void* var_67 = tensorHalfConvolution(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0); 
-        void* var_68 = tensorHalfAdd(var_67, conv2d_16_b); 
-        void* var_70 = tensorHalfConvolution(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0); 
-        void* var_71 = tensorHalfAdd(var_70, conv2d_17_b); 
-        void* var_72 = tensorHalfAdd(var_71, var_68); 
-        void* var_73 = tensorHalfRelu(var_72); 
-        void* var_75 = tensorHalfConvolution(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0); 
-        void* var_76 = tensorHalfAdd(var_75, conv2d_18_b); 
-        void* var_77 = tensorHalfRelu(var_76); 
-        void* var_79 = tensorHalfConvolution(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0); 
-        void* var_80 = tensorHalfAdd(var_79, conv2d_19_b); 
-        void* var_81 = tensorHalfAdd(var_73, var_80); 
-        void* var_82 = tensorHalfRelu(var_81); 
-        void* var_84 = tensorHalfConvolution(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0); 
-        void* var_85 = tensorHalfAdd(var_84, conv2d_20_b); 
-        void* var_86 = tensorHalfRelu(var_85); 
-        void* var_88 = tensorHalfConvolution(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0); 
-        void* var_89 = tensorHalfAdd(var_88, conv2d_21_b); 
-        void* var_90 = tensorHalfAdd(var_82, var_89); 
-        void* var_91 = tensorHalfRelu(var_90); 
-        void* var_92 = tensorHalfPooling(var_91,1,8,8,0,0,8,8); 
-        void* var_94 = tensorHalfGemmGPU(var_92, dense_1_w); 
-        void* var_95 = tensorHalfAdd(var_94, dense_1_b); 
-        void* var_96 = tensorSoftmax(var_95); 
-
-        profiler.pause_profiler();
-        auto time_energy = profiler.get_time_energy();
-        total_time += time_energy.first;
-        profiler.reset();
-
-        uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-        float accuracy = computeAccuracy2(labels,batch_size,var_96); 
-        final_accuracy += accuracy;
-        
-        freeBatchMemory();
-      }
-  }
-  stopProfiling();
-
-  profiler.stop_profiler();
-
-  final_accuracy = final_accuracy / batch_count / total_runs;
-  dumpFinalAccuracy(final_accuracy);
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-  
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/vgg16_cifar100_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/vgg16_cifar100_half_profiling.cc
deleted file mode 100644
index b778b1720c8a2db2f90230c3e57d0e0928f8665b..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/vgg16_cifar100_half_profiling.cc
+++ /dev/null
@@ -1,182 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-
-#include "../../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../../include/utils.h"
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000; 
-  int batch_size = 1000; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  int total_runs = 10;
-  Profiler profiler;
-  profiler.start_profiler();
-  double total_time = 0.0;
-
-  for (int i = 0; i < total_runs; i++){
-	  for(int i = 0; i < batch_count; i++){ 
-
-		int start = i * batch_size; 
-		int end = (i + 1) * batch_size; 
-
-		void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-        profiler.resume_profiler();
-
-		void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-		void* var_1 = tensorHalfAdd(var_0, conv2d_1_b); 
-		void* var_2 = tensorHalfRelu(var_1); 
-		void* var_4 = tensorHalfConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-		void* var_5 = tensorHalfAdd(var_4, conv2d_2_b); 
-		void* var_6 = tensorHalfRelu(var_5); 
-		void* var_7 = tensorHalfPooling(var_6,0,2,2,0,0,2,2); 
-		void* var_8 = tensorHalfConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-		void* var_9 = tensorHalfAdd(var_8, conv2d_3_b); 
-		void* var_10 = tensorHalfRelu(var_9); 
-		void* var_12 = tensorHalfConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-		void* var_13 = tensorHalfAdd(var_12, conv2d_4_b); 
-		void* var_14 = tensorHalfRelu(var_13); 
-		void* var_15 = tensorHalfPooling(var_14,0,2,2,0,0,2,2); 
-		void* var_16 = tensorHalfConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-		void* var_17 = tensorHalfAdd(var_16, conv2d_5_b); 
-		void* var_18 = tensorHalfRelu(var_17); 
-		void* var_20 = tensorHalfConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-		void* var_21 = tensorHalfAdd(var_20, conv2d_6_b); 
-		void* var_22 = tensorHalfRelu(var_21); 
-		void* var_24 = tensorHalfConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-		void* var_25 = tensorHalfAdd(var_24, conv2d_7_b); 
-		void* var_26 = tensorHalfRelu(var_25); 
-		void* var_27 = tensorHalfPooling(var_26,0,2,2,0,0,2,2); 
-		void* var_28 = tensorHalfConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); 
-		void* var_29 = tensorHalfAdd(var_28, conv2d_8_b); 
-		void* var_30 = tensorHalfRelu(var_29); 
-		void* var_32 = tensorHalfConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-		void* var_33 = tensorHalfAdd(var_32, conv2d_9_b); 
-		void* var_34 = tensorHalfRelu(var_33); 
-		void* var_36 = tensorHalfConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); 
-		void* var_37 = tensorHalfAdd(var_36, conv2d_10_b); 
-		void* var_38 = tensorHalfRelu(var_37); 
-		void* var_39 = tensorHalfPooling(var_38,0,2,2,0,0,2,2); 
-		void* var_40 = tensorHalfConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-		void* var_41 = tensorHalfAdd(var_40, conv2d_11_b); 
-		void* var_42 = tensorHalfRelu(var_41); 
-		void* var_44 = tensorHalfConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-		void* var_45 = tensorHalfAdd(var_44, conv2d_12_b); 
-		void* var_46 = tensorHalfRelu(var_45); 
-		void* var_48 = tensorHalfConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-		void* var_49 = tensorHalfAdd(var_48, conv2d_13_b); 
-		void* var_50 = tensorHalfRelu(var_49); 
-		void* var_51 = tensorHalfPooling(var_50,0,2,2,0,0,2,2); 
-		void* var_54 = tensorHalfGemmGPU(var_51, dense_1_w); 
-		void* var_55 = tensorHalfAdd(var_54, dense_1_b); 
-		void* var_56 = tensorHalfRelu(var_55); 
-		void* var_58 = tensorHalfGemmGPU(var_56, dense_2_w); 
-		void* var_59 = tensorHalfAdd(var_58, dense_2_b); 
-		void* var_60 = tensorSoftmax(var_59); 
-
-        profiler.pause_profiler();
-        auto time_energy = profiler.get_time_energy();
-        total_time += time_energy.first;
-        profiler.reset();
-
-		uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-		float accuracy = computeAccuracy2(labels, batch_size, var_60, 100); 
-		final_accuracy += accuracy; 
-		freeBatchMemory(); 
-	 
-	  }
-  }
-
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  final_accuracy = final_accuracy / batch_count / total_runs; 
-  dumpFinalAccuracy(final_accuracy); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/vgg16_cifar10_half_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/vgg16_cifar10_half_profiling.cc
deleted file mode 100644
index 3f97e5dbde3b6d124888a8c74d435880097a394c..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/profiling/vgg16_cifar10_half_profiling.cc
+++ /dev/null
@@ -1,189 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h>
-
-#include "../../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../../include/utils.h"
-
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/vgg16_cifar10_2/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 5000;
-  int batch_size = 1000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  int total_runs = 10;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-  for (int itrs = 0; itrs < total_runs; itrs++){
-      for(int i = 0; i < batch_count; i++){
-
-        int start = i * batch_size;
-        int end = (i + 1) * batch_size;
-        
-        void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); 
-    
-        profiler.resume_profiler();
- 
-        void* var_0 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-        void* var_1 = tensorHalfAdd(var_0, conv2d_1_b); 
-        void* var_2 = tensorHalfRelu(var_1); 
-        void* var_4 = tensorHalfConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-        void* var_5 = tensorHalfAdd(var_4, conv2d_2_b); 
-        void* var_6 = tensorHalfRelu(var_5); 
-        void* var_7 = tensorHalfPooling(var_6,0,2,2,0,0,2,2); 
-        void* var_8 = tensorHalfConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-        void* var_9 = tensorHalfAdd(var_8, conv2d_3_b); 
-        void* var_10 = tensorHalfRelu(var_9); 
-        void* var_12 = tensorHalfConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-        void* var_13 = tensorHalfAdd(var_12, conv2d_4_b); 
-        void* var_14 = tensorHalfRelu(var_13); 
-        void* var_15 = tensorHalfPooling(var_14,0,2,2,0,0,2,2); 
-        void* var_16 = tensorHalfConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-        void* var_17 = tensorHalfAdd(var_16, conv2d_5_b); 
-        void* var_18 = tensorHalfRelu(var_17); 
-        void* var_20 = tensorHalfConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-        void* var_21 = tensorHalfAdd(var_20, conv2d_6_b); 
-        void* var_22 = tensorHalfRelu(var_21); 
-        void* var_24 = tensorHalfConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-        void* var_25 = tensorHalfAdd(var_24, conv2d_7_b); 
-        void* var_26 = tensorHalfRelu(var_25); 
-        void* var_27 = tensorHalfPooling(var_26,0,2,2,0,0,2,2); 
-        void* var_28 = tensorHalfConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); 
-        void* var_29 = tensorHalfAdd(var_28, conv2d_8_b); 
-        void* var_30 = tensorHalfRelu(var_29); 
-        void* var_32 = tensorHalfConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-        void* var_33 = tensorHalfAdd(var_32, conv2d_9_b); 
-        void* var_34 = tensorHalfRelu(var_33); 
-        void* var_36 = tensorHalfConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); 
-        void* var_37 = tensorHalfAdd(var_36, conv2d_10_b); 
-        void* var_38 = tensorHalfRelu(var_37); 
-        void* var_39 = tensorHalfPooling(var_38,0,2,2,0,0,2,2); 
-        void* var_40 = tensorHalfConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-        void* var_41 = tensorHalfAdd(var_40, conv2d_11_b); 
-        void* var_42 = tensorHalfRelu(var_41); 
-        void* var_44 = tensorHalfConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-        void* var_45 = tensorHalfAdd(var_44, conv2d_12_b); 
-        void* var_46 = tensorHalfRelu(var_45); 
-        void* var_48 = tensorHalfConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-        void* var_49 = tensorHalfAdd(var_48, conv2d_13_b); 
-        void* var_50 = tensorHalfRelu(var_49); 
-        void* var_51 = tensorHalfPooling(var_50,0,2,2,0,0,2,2); 
-        void* var_54 = tensorHalfGemmGPU(var_51, dense_1_w); 
-        void* var_55 = tensorHalfAdd(var_54, dense_1_b); 
-        void* var_56 = tensorHalfRelu(var_55); 
-        void* var_58 = tensorHalfGemmGPU(var_56, dense_2_w); 
-        void* var_59 = tensorHalfAdd(var_58, dense_2_b); 
-        void* var_60 = tensorSoftmax(var_59); 
-
-        profiler.pause_profiler();
-        auto time_energy = profiler.get_time_energy();
-        total_time += time_energy.first;
-        profiler.reset();
-
-        uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-        float accuracy = computeAccuracy2(labels,batch_size,var_60); 
-        final_accuracy += accuracy;
-
-        freeBatchMemory();
-      }
-  }
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  profiler.stop_profiler();
-  // Start power and performance profiling 
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count / total_runs;
-  dumpFinalAccuracy(final_accuracy);
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc
index 9779b95d865d1939244f50c3910d7ed770b0729d..741c4a443cc9a56c443ec5858aaed5a7d5705268 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/resnet18_cifar10_half.cc
@@ -13,7 +13,7 @@ int main(){
 
   llvm_hpvm_initTensorRt(0); 
   
-  std::string dir_prefix = std::string("../model_params/resnet18_cifar10/"); 
+  std::string dir_prefix = model_params_path + std::string("/resnet18_cifar10/"); 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); 
   std::string labels_path =  dir_prefix + std::string("labels.bin"); 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc
index 7107defe9d154731a46efaf5c8ad244ceb69bad7..9ac1deea68c693f8baf2df2d9f2b626b3597ad7f 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar100_half.cc
@@ -13,7 +13,7 @@ int main(){
 
     llvm_hpvm_initTensorRt(0); 
 
-    std::string dir_prefix = std::string("../model_params/vgg16_cifar100/"); 
+    std::string dir_prefix = model_params_path + std::string("/vgg16_cifar100/"); 
     std::string input_path =  dir_prefix + std::string("input.bin"); 
     std::string labels_path =  dir_prefix + std::string("labels.bin"); 
     std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc
index 45e74fbe32e053e2d43c1dde0f90460c21ab0118..f92bac10e27162fe0bc59c07aa4f9ede542ccd6e 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp16/vgg16_cifar10_half.cc
@@ -13,7 +13,7 @@ int main(){
 
   llvm_hpvm_initTensorRt(0); 
 
-  std::string dir_prefix = std::string("../model_params/vgg16_cifar10/"); 
+  std::string dir_prefix = model_params_path + std::string("/vgg16_cifar10/"); 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   std::string labels_path =  dir_prefix + std::string("labels.bin"); 
   std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_canny.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_canny.cc
deleted file mode 100644
index 628ce6616cde37a5eddde5ab6049001525203580..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_canny.cc
+++ /dev/null
@@ -1,255 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h>
-#include <vector>
-#include <string.h> 
-#include "tensor_runtime.h" 
-#include "utils.h" 
-
-#include "tensor_custom_ops_cpu.h"
-
-
-
-
-Tensor* gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
-  int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-  auto *data = new float[w * h];
-  float sum = 0.0f;
-  for (int64_t i = -m; i <= m; i++)
-    for (int64_t j = -n; j <= n; j++) {
-      size_t idx = (i + m) * h + (j + n);
-      float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-      data[idx] = exp(exponent);
-      sum += data[idx];
-    }
-  if (sum != 0.0f)
-    for (size_t i = 0; i < w * h; i++)
-      data[i] /= sum;
-  return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
-}
-
-std::pair<Tensor*, Tensor*> getSobelKernels() {
-  std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1});
-  std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1});
-  auto *t1 =
-      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1);
-  auto *t2 =
-      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1);
-  return std::make_pair(t1, t2);
-}
-
-/*** 
-
-TODOs:
-
-* Precision calculation?
-* tensorArgMax?
-* tensorSelect?
-* tensorContract
-* autotuning support for these functions
-* FP32 vs F16 versions of sampling perforation?
-* Need tensorRT version and a PROMISE API version
-* How to Profile? are profileEvent calls added
-* Pytorch version
-
-
-****/
-
-void* canny_filter(void* dataset) {
-
-  Tensor* gaussian = gaussianFilter(1.4, 5, 5, 1);
-  Tensor* kernel_x, *kernel_y;
-  std::tie(kernel_x, kernel_y) = getSobelKernels();
-
-  // 0. Grayscale
-  auto* summed_image = tensorReduce(dataset, 1, MathOp::Add);
-  auto* grayscale_image = tensorMap1(MathOp::Avg3, summed_image);
-  // 1. Denoise
-
-  auto* image2 = tensorConvolution(grayscale_image, gaussian,
-				   2, 2, // padding
-				   1, 1, // strides
-				   1, 0); // conv_mode, conv_groups
-				    
-  // 2. Get edge gradient / direction
-  auto *grad_x = tensorConvolution(image2, kernel_x,
-				   1, 1,
-				   1, 1,
-				   1, 0);
-   
-  auto *grad_y = tensorConvolution(image2, kernel_y,
-				   1, 1,
-				   1, 1,
-				   1, 0);
- 
-  auto *grad_mag = tensorMap2(MathOp::Hypot, grad_x, grad_y);
-  // 2.5. Normalize grad magnitude
-  auto *grad_max_1D = tensorReduce(grad_mag, 2, MathOp::Max);
-  auto *grad_max = tensorReduce(grad_max_1D, 3, MathOp::Max);
-  auto *grad_mag_norm = tensorMap2(MathOp::Div, grad_mag, grad_max);
-  return grad_mag_norm;
-}
-
-
-
-
-void* invoke_canny(void* input) {
-  
-  auto* result = canny_filter(input);
-
-  printf("Done with Canny \n");
-  
-  return result;
-}
-
-
-
-
-
-
-
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); 
-  std::string input_path =  dir_prefix + std::string("norm_cifar_input.bin"); 
-  std::string canny_input_path =  dir_prefix + std::string("canny_input.bin");
-  std::string labels_path =  dir_prefix + std::string("test_labels.bin"); 
-
-  void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin",
-					  float_type, 32, 3, 3, 3);  
-  void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin",
-					  float_type, 32, 32, 3, 3);  
-  void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin",
-					float_type, 1, 32, 1, 1);
-  void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin",
-					  float_type, 64, 32, 3, 3);  
-  void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin",
-					  float_type, 64, 64, 3, 3);  
-  void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin",
-					float_type, 1, 64, 1, 1);
-  void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin",
-					  float_type, 128, 64, 3, 3);  
-  void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin",
-					float_type, 1, 128, 1, 1);
-  void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin",
-					  float_type, 128, 128, 3, 3);  
-  void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin",
-					float_type, 1, 128, 1, 1);
-  
-  void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin",
-					 float_type, 1, 1, 2048, 10);  
-  void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin",
-				      float_type, 1, 10, 1, 1);  
- 
-
-  int test_input_size = 5000;
-  int batch_size = 500;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();  
-  startMemTracking();
-
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-  for(int i = 0; i < batch_count; i++){
-
-    int start = i * batch_size;
-    int end = (i + 1) * batch_size;
-
-
-    void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-    void* canny_input = readInputBatch(canny_input_path.c_str(), 0,start,end, 3, 128, 128);
-
-    void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv1out, conv1_bias); 
-    void* conv1_tanh = tensorTanh(conv1out);
-    
-    // 2nd Layer
-    void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv2out, conv2_bias); 
-    void* conv2_tanh = tensorTanh(conv2out);
-    void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
-     
-    // 3rd Layer
-    void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv3out, conv3_bias); 
-    void* conv3_tanh = tensorTanh(conv3out);
-
-    // 4th Layer
-    void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv4out, conv4_bias); 
-    void* conv4_tanh = tensorTanh(conv4out);
-    void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2);
-    
-    // 5th Layer
-    void* conv5out = tensorConvolution(pool4out, conv5_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv5out, conv5_bias); 
-    void* conv5_tanh = tensorTanh(conv5out);
-
-    // 6th Layer
-    void* conv6out = tensorConvolution(conv5_tanh, conv6_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv6out, conv6_bias); 
-  
-    void* conv6_tanh = tensorTanh(conv6out);
-    void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2);
-    
-    // final FC Layer
-    void* gemm1out = tensorGemmGPU(pool6out, fc1_weights);  
-    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-    void* result = tensorSoftmax(gemm1biasout);
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, result); 
-    final_accuracy += accuracy;
-
-
-    std::vector<int> index_vector;
-    index_vector.push_back(1);
-    index_vector.push_back(2);
-    index_vector.push_back(3);
-    index_vector.push_back(4);
-    index_vector.push_back(5);
-    
-    
-    void* argmax_out = tensorArgMax(result);
-    void* select_out = tensorSelect2(argmax_out, index_vector);
-    void* reduced_input = tensorContract(canny_input, select_out);
-
-    invoke_canny(reduced_input);
-    
-
-    freeBatchMemory();    
-  }
-
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count;
-  dumpFinalAccuracy(final_accuracy);
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc
index 5918f4f18ebdb7d4f2fa3e37c0982b8ed8d10932..50d9747f990d486c4543607d16d4a4ccb88b0517 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet2_cifar10.cc
@@ -19,7 +19,7 @@ void testCifarNet(){
   printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
  
 
-  std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); 
+  std::string dir_prefix = model_params_path +  std::string("/alexnet2_cifar10/"); 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   std::string labels_path =  dir_prefix + std::string("labels.bin");
   std::string labels32_path =  dir_prefix + std::string("labels32.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc
index 8129fbfafcdd3e991e67d33fd3013e1700da45c5..1a76f1ae8ba6059124117b82cd72e8ccd6cdeba6 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/alexnet_cifar10.cc
@@ -13,7 +13,7 @@ int main(){
 
   llvm_hpvm_initTensorRt(0); 
 
-  std::string dir_prefix = std::string("../model_params/alexnet_cifar10/"); 
+  std::string dir_prefix = model_params_path + std::string("/alexnet_cifar10/"); 
 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   std::string labels_path =  dir_prefix + std::string("labels.bin");
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc
index c047ffe090a93711cb66973ef6622d46fccdcee3..7508f3119eeb469a164fad9741000308e3e8c031 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/lenet_mnist.cc
@@ -22,7 +22,7 @@ void testLenetTanh(){
 
   int test_batch_size = 5000;
 
-  std::string dir_prefix = std::string("../model_params/lenet_mnist/");   
+  std::string dir_prefix = model_params_path + std::string("/lenet_mnist/");   
 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   std::string labels_path =  dir_prefix + std::string("labels.bin"); 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc
index 78ca2dac98435ea146da44a78bb2f7405af8c5ef..7c311a568647caa107112bed4982fb57254dc7b3 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet.cc
@@ -15,7 +15,7 @@ int main(){
   llvm_hpvm_initTensorRt(0); 
 
 
-  std::string dir_prefix = std::string("../model_params/mobilenet/"); 
+  std::string dir_prefix = model_params_path + std::string("/mobilenet/"); 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   std::string labels_path =  dir_prefix + std::string("labels.bin"); 
   std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet_shallow.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet_shallow.cc
deleted file mode 100644
index d30518216f76160e183a915a6e6da2018239ab60..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/mobilenet_shallow.cc
+++ /dev/null
@@ -1,240 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(int argc, char* argv[]){ 
-
-  int total_runs = 1;
-  if (argc > 1){
-    total_runs = atoi(argv[1]);
-  }
-
-  
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); 
-
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000; 
-  int batch_size = 2500; 
-  int batch_count = test_input_size / batch_size; 
-
-
-  for(int j = 0; j < total_runs; j++){    
-    float final_accuracy = 0.0;    
-    for(int i = 0; i < batch_count; i++){ 
-
-      int start = i * batch_size; 
-      int end = (i + 1) * batch_size; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-      void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-      void* var_2 = tensorRelu(var_1); 
-      void* var_4 = tensorConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-      void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-      void* var_6 = tensorRelu(var_5); 
-      void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-      void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-      void* var_9 = tensorRelu(var_8); 
-      void* var_11 = tensorConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-      void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-      void* var_13 = tensorRelu(var_12); 
-      void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-      void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-      void* var_16 = tensorRelu(var_15); 
-      void* var_18 = tensorConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-      void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-      void* var_20 = tensorRelu(var_19); 
-      void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-      void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-      void* var_23 = tensorRelu(var_22); 
-      void* var_26 = tensorConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-      void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-      void* var_28 = tensorRelu(var_27); 
-      void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-      void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-      void* var_31 = tensorRelu(var_30); 
-      void* var_33 = tensorConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-      void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-      void* var_35 = tensorRelu(var_34); 
-      void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-      void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-      void* var_38 = tensorRelu(var_37); 
-      void* var_41 = tensorConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-      void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-      void* var_43 = tensorRelu(var_42); 
-      void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-      void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-      void* var_46 = tensorRelu(var_45); 
-      void* var_47 = tensorPooling(var_46,1,2,2,0,0,2,2); 
-      void* var_49 = tensorGemmGPU(var_47, dense_1_w); 
-      void* var_50 = tensorAdd(var_49, dense_1_b); 
-      void* var_51 = tensorSoftmax(var_50); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_51); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy); 
-  }
-
-  dumpExecutionAccuracies();
-    
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc
index b0c868085bae1abc2025364609114cc21c7d213a..87b8cd4156ed8d7f882ff7642420c995cd7c3a0f 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet18_cifar10.cc
@@ -13,7 +13,7 @@ int main(){
 
   llvm_hpvm_initTensorRt(1); 
   
-  std::string dir_prefix = std::string("../model_params/resnet18_cifar10/"); 
+  std::string dir_prefix = model_params_path + std::string("/resnet18_cifar10/"); 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); 
   std::string labels_path =  dir_prefix + std::string("labels.bin"); 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc
index 1192d04de200c8e8183c35861da2d04aa705e955..0914b3f70c353ee7e56c39ccf52f21914618301e 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/resnet50_imagenet.cc
@@ -15,7 +15,7 @@ int main(){
   llvm_hpvm_initTensorRt(0); 
 
 
-  std::string dir_prefix = std::string("/shared/hsharif3/resnet50_imagenet_tune/"); 
+  std::string dir_prefix = model_params_path + std::string("/shared/hsharif3/resnet50_imagenet/"); 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   std::string labels_path =  dir_prefix + std::string("labels.bin"); 
   std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc
index e8469e8a4892f51337118e4699f09ae98c13bf71..a6dc7cbc11cf77357a749bff117489fc4b292941 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar10.cc
@@ -13,7 +13,7 @@ int main(){
 
   llvm_hpvm_initTensorRt(0); 
 
-  std::string dir_prefix = std::string("../model_params/vgg16_cifar10/"); 
+  std::string dir_prefix = model_params_path + std::string("/vgg16_cifar10/"); 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   std::string labels_path =  dir_prefix + std::string("labels.bin"); 
   std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc
index 0290a2782880c1aa8c1ea33f5564926665d968d6..2539f8d8722909724a9dc2890e82f4f98853f5cd 100644
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc
+++ b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100.cc
@@ -13,7 +13,7 @@ int main(){
 
   llvm_hpvm_initTensorRt(0); 
 
-  std::string dir_prefix = std::string("../model_params/vgg16_cifar100/"); 
+  std::string dir_prefix = model_params_path + std::string("/vgg16_cifar100/"); 
   std::string input_path =  dir_prefix + std::string("input.bin"); 
   std::string labels_path =  dir_prefix + std::string("labels.bin");
   
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100_5.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100_5.cc
deleted file mode 100644
index 3ee273d70aea6d74cfa55f250e999b05506f9b21..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/fp32/vgg16_cifar100_5.cc
+++ /dev/null
@@ -1,167 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); 
-  //std::string input_path =  dir_prefix + std::string("vgg16_cifar100_calib.bin"); 
-  //std::string labels_path =  dir_prefix + std::string("vgg16_cifar100_train_labels.bin");
-
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin");
-  
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000; 
-  int batch_size = 2500;
-  int offset = 5000;
-  
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size + offset; 
-    int end = (i + 1) * batch_size + offset; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-    void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-    void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-    void* var_2 = tensorRelu(var_1); 
-    void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-    void* var_5 = tensorAdd(var_4, conv2d_2_b); 
-    void* var_6 = tensorRelu(var_5); 
-    void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); 
-    void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-    void* var_9 = tensorAdd(var_8, conv2d_3_b); 
-    void* var_10 = tensorRelu(var_9); 
-    void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-    void* var_13 = tensorAdd(var_12, conv2d_4_b); 
-    void* var_14 = tensorRelu(var_13); 
-    void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); 
-    void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-    void* var_17 = tensorAdd(var_16, conv2d_5_b); 
-    void* var_18 = tensorRelu(var_17); 
-    void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-    void* var_21 = tensorAdd(var_20, conv2d_6_b); 
-    void* var_22 = tensorRelu(var_21); 
-    void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-    void* var_25 = tensorAdd(var_24, conv2d_7_b); 
-    void* var_26 = tensorRelu(var_25); 
-    void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); 
-    void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); 
-    void* var_29 = tensorAdd(var_28, conv2d_8_b); 
-    void* var_30 = tensorRelu(var_29); 
-    void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-    void* var_33 = tensorAdd(var_32, conv2d_9_b); 
-    void* var_34 = tensorRelu(var_33); 
-    void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); 
-    void* var_37 = tensorAdd(var_36, conv2d_10_b); 
-    void* var_38 = tensorRelu(var_37); 
-    void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); 
-    void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-    void* var_41 = tensorAdd(var_40, conv2d_11_b); 
-    void* var_42 = tensorRelu(var_41); 
-    void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-    void* var_45 = tensorAdd(var_44, conv2d_12_b); 
-    void* var_46 = tensorRelu(var_45); 
-    void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-    void* var_49 = tensorAdd(var_48, conv2d_13_b); 
-    void* var_50 = tensorRelu(var_49); 
-    void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); 
-    void* var_54 = tensorGemmGPU(var_51, dense_1_w); 
-    void* var_55 = tensorAdd(var_54, dense_1_b); 
-    void* var_56 = tensorRelu(var_55); 
-    void* var_58 = tensorGemmGPU(var_56, dense_2_w); 
-    void* var_59 = tensorAdd(var_58, dense_2_b); 
-    void* var_60 = tensorSoftmax(var_59); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-    //float accuracy = computeAccuracy2(labels, batch_size, var_60, 100);
-    float accuracy = computeTop5Accuracy(labels, batch_size, var_60, 100);
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/scripts/test_dnns.py b/hpvm/projects/hpvm-tensor-rt/scripts/test_dnns.py
index 8d13a292372d81d491aedf21341c0e51859be723..11c3584a41e272527bc8141d9e9a9ed2d22ab51b 100644
--- a/hpvm/projects/hpvm-tensor-rt/scripts/test_dnns.py
+++ b/hpvm/projects/hpvm-tensor-rt/scripts/test_dnns.py
@@ -28,9 +28,8 @@ def createBaselineConfig(f_path, base_flag, num_layers):
         
 if __name__ == "__main__":
 
-    FP32_binary_paths = ["alexnet_cifar10", "alexnet2_cifar10", "resnet18_cifar10", "vgg16_cifar10", "vgg16_cifar100", "lenet_mnist", "mobilenet", "mobilenet_shallow"]
-    FP16_binary_paths = ["alexnet_half", "alexnet2_half", "resnet18_half", "vgg16_cifar10_half", "vgg16_cifar100_half", "lenet_half", "mobilenet_half", "mobilenet_shallow_half"]
-    PROMISE_binary_paths = ["alexnet_promise", "alexnet2_promise", "resnet18_promise", "vgg16_cifar10_promise", "vgg16_cifar100_promise", "mobilenet_promise", "mobilenet_shallow_promise"]
+    FP32_binary_paths = ["alexnet_cifar10_fp32", "alexnet2_cifar10_fp32", "resnet18_cifar10_fp32", "vgg16_cifar10_fp32", "vgg16_cifar100_fp32", "lenet_mnist_fp32", "mobilenet_cifar10_fp32"]
+    FP16_binary_paths = ["alexnet_cifar10_fp16", "alexnet2_cifar10_fp16", "resnet18_cifar10_fp16", "vgg16_cifar10_fp16", "vgg16_cifar100_fp16", "lenet_mnist_fp16", "mobilenet_cifar10_fp16"]
 
     fp32_results = {}
     for binary_path in FP32_binary_paths:
@@ -46,15 +45,5 @@ if __name__ == "__main__":
         fp16_results[binary_path] = accuracy
 
 
-    createBaselineConfig("promise_flags", 11, 1000)
-    promise_results = {}
-    for binary_path in PROMISE_binary_paths:
-        subprocess.call("./" + binary_path)
-        accuracy = readAccuracy("final_accuracy")
-        promise_results[binary_path] = accuracy
-
-
     printResults(fp32_results)
     printResults(fp16_results)
-    printResults(promise_results)
-