diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_promise.cc
index 71df5b9d38dbaac28437c4edd2b15d3e4e6fa84f..2cd364bf6b184aa271d9916fc6900871a57bd039 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_promise.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_promise.cc
@@ -129,7 +129,7 @@ int main(int argc, char* argv[]){
       if(shouldDumpClassConf){
 	int relative_start = start - offset;
 	int relative_end = end - offset;
-        copyClassConfsAndLabels(var_6, classConfs, predictedLabels, relative_start, relative_end);
+        copyClassConfsAndLabels(var_7, classConfs, predictedLabels, relative_start, relative_end);
       }
 
       freeBatchMemory(); 
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_promise.cc
index cbfc534681efdea9967bd6d0096572e2bad87c16..691c97a3ef4159fd61df5bcabf8adc14fd28eabd 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_promise.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_promise.cc
@@ -6,8 +6,8 @@
 #include <sys/types.h> 
 #include <sys/stat.h> 
 #include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
+#include "tensor_runtime.h" 
+#include "utils.h" 
 
 
 int total_runs = 1;
@@ -41,7 +41,17 @@ int main(int argc, char* argv[]){
     offset = atoi(argv[5]);   
   }
 
-    
+  bool shouldDumpClassConf = false;
+  float* classConfs;
+  int* predictedLabels;
+  if(argc > 6){
+    shouldDumpClassConf = true;
+    classConfs = (float*) malloc(sizeof(float) * test_input_size);
+    predictedLabels = (int*) malloc(sizeof(int) * test_input_size);
+  }
+
+
+  
   llvm_hpvm_initTensorRt(1); 
 
 
@@ -58,12 +68,14 @@ int main(int argc, char* argv[]){
     int batch_count = test_input_size / batch_size; 
     float final_accuracy = 0.0; 
 
-    for(int i = 0; i < batch_count; i++){ 
 
+    std::string dir_prefix = std::string("../model_params/mobilenet/"); 
+    std::string input_path =  dir_prefix + std::string("input.bin"); 
+    std::string labels_path =  dir_prefix + std::string("labels.bin"); 
+    std::string labels32_path =  dir_prefix + std::string("labels32.bin"); 
+
+    for(int i = 0; i < batch_count; i++){ 
 
-      std::string dir_prefix = std::string("../model_params/mobilenet/"); 
-      std::string input_path =  dir_prefix + std::string("input.bin"); 
-      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
       std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
       void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
       std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
@@ -433,9 +445,16 @@ int main(int argc, char* argv[]){
       uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
 
       float accuracy = computeAccuracy2(labels, batch_size, var_83); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
+      final_accuracy += accuracy;
+
+      if(shouldDumpClassConf){
+	int relative_start = start - offset;
+	int relative_end = end - offset;
+        copyClassConfsAndLabels(var_83, classConfs, predictedLabels, relative_start, relative_end);
+      }
+
+      
+      freeBatchMemory();  
     }
 
     final_accuracy = final_accuracy / batch_count; 
@@ -443,6 +462,15 @@ int main(int argc, char* argv[]){
 
     if (final_accuracy < bench_acc)
      missed += 1;
+
+
+    if(shouldDumpClassConf){
+      int labels_start = offset;
+      int labels_end = offset + test_input_size;
+      uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end);
+      dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size);
+    }
+
   }
 
   dumpExecutionAccuracies(); 
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_promise.cc
index 7a8136d7d3f66e971f010f17bda1b78dde8ee181..89b3697298f896c0975b7d6e458e88fe7db96ca1 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_promise.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_promise.cc
@@ -6,8 +6,8 @@
 #include <sys/types.h> 
 #include <sys/stat.h> 
 #include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
+#include "tensor_runtime.h" 
+#include "utils.h" 
 
 
 int total_runs = 1;
@@ -41,6 +41,16 @@ int main(int argc, char* argv[]){
     offset = atoi(argv[5]);   
   }
 
+  bool shouldDumpClassConf = false;
+  float* classConfs;
+  int* predictedLabels;
+  if(argc > 6){
+    shouldDumpClassConf = true;
+    classConfs = (float*) malloc(sizeof(float) * test_input_size);
+    predictedLabels = (int*) malloc(sizeof(int) * test_input_size);
+  }
+
+  
   
   llvm_hpvm_initTensorRt(1); 
 
@@ -56,11 +66,13 @@ int main(int argc, char* argv[]){
     int batch_count = test_input_size / batch_size; 
     float final_accuracy = 0.0; 
 
+    std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); 
+    std::string input_path =  dir_prefix + std::string("input.bin"); 
+    std::string labels_path =  dir_prefix + std::string("labels.bin");
+    std::string labels32_path =  dir_prefix + std::string("labels32.bin"); 
+
     for(int i = 0; i < batch_count; i++){ 
 
-      std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); 
-      std::string input_path =  dir_prefix + std::string("input.bin"); 
-      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
       std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
       void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
       std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
@@ -248,7 +260,15 @@ int main(int argc, char* argv[]){
       uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
 
       float accuracy = computeAccuracy2(labels, batch_size, var_41); 
-      final_accuracy += accuracy; 
+      final_accuracy += accuracy;
+
+      if(shouldDumpClassConf){
+	int relative_start = start - offset;
+	int relative_end = end - offset;
+        copyClassConfsAndLabels(var_41, classConfs, predictedLabels, relative_start, relative_end);
+      }
+
+      
       freeBatchMemory(); 
  
     }
@@ -259,6 +279,15 @@ int main(int argc, char* argv[]){
 
     if (final_accuracy < bench_acc)
       missed += 1;
+
+    
+    if(shouldDumpClassConf){
+      int labels_start = offset;
+      int labels_end = offset + test_input_size;
+      uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end);
+      dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size);
+    }
+
   }
 
   dumpExecutionAccuracies(); 
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_promise.cc
index b229fc9c2b81703c2d29039480297192a0a3c746..21c7b051c24c9c053041f6cf4d63e8ace3b3645d 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_promise.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_promise.cc
@@ -6,8 +6,8 @@
 #include <sys/types.h> 
 #include <sys/stat.h> 
 #include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
+#include "tensor_runtime.h" 
+#include "utils.h" 
 
 
 int total_runs = 1;
@@ -42,6 +42,16 @@ int main(int argc, char* argv[]){
   }
 
   
+  bool shouldDumpClassConf = false;
+  float* classConfs;
+  int* predictedLabels;
+  if(argc > 6){
+    shouldDumpClassConf = true;
+    classConfs = (float*) malloc(sizeof(float) * test_input_size);
+    predictedLabels = (int*) malloc(sizeof(int) * test_input_size);
+  }
+
+  
 
   llvm_hpvm_initTensorRt(1); 
 
@@ -57,11 +67,14 @@ int main(int argc, char* argv[]){
     int batch_count = test_input_size / batch_size; 
     float final_accuracy = 0.0; 
 
+
+    std::string dir_prefix = std::string("../model_params/resnet18_cifar10_promise/");	   
+    std::string input_path =  dir_prefix + std::string("input.bin"); 
+    std::string labels_path =  dir_prefix + std::string("labels.bin");
+    std::string labels32_path =  dir_prefix + std::string("labels32.bin");
+    
     for(int i = 0; i < batch_count; i++){ 
 
-      std::string dir_prefix = std::string("../model_params/resnet18_cifar10_promise/");	   
-      std::string input_path =  dir_prefix + std::string("input.bin"); 
-      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
       std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
       void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); 
       std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
@@ -202,7 +215,16 @@ int main(int argc, char* argv[]){
       uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
 
       float accuracy = computeAccuracy2(labels, batch_size, var_41); 
-      final_accuracy += accuracy; 
+      final_accuracy += accuracy;
+
+
+      if(shouldDumpClassConf){
+	int relative_start = start - offset;
+	int relative_end = end - offset;
+        copyClassConfsAndLabels(var_41, classConfs, predictedLabels, relative_start, relative_end);
+      }
+
+      
       freeBatchMemory(); 
  
     }
@@ -213,6 +235,15 @@ int main(int argc, char* argv[]){
 
     if (final_accuracy < bench_acc)
       missed += 1;
+
+
+    if(shouldDumpClassConf){
+      int labels_start = offset;
+      int labels_end = offset + test_input_size;
+      uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end);
+      dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size);
+    }
+
   }
 
   dumpExecutionAccuracies(); 
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_promise.cc
index ed11c5158d63c9e07188200e28cbfe32f08a87b2..27bc38e0e336d970f3f3096bdcc447e29e8b9a33 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_promise.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_promise.cc
@@ -6,8 +6,8 @@
 #include <sys/types.h> 
 #include <sys/stat.h> 
 #include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
+#include "tensor_runtime.h" 
+#include "utils.h" 
 
 
 
@@ -44,6 +44,17 @@ int main(int argc, char* argv[]){
   }
 
 
+  bool shouldDumpClassConf = false;
+  float* classConfs;
+  int* predictedLabels;
+  if(argc > 6){
+    shouldDumpClassConf = true;
+    classConfs = (float*) malloc(sizeof(float) * test_input_size);
+    predictedLabels = (int*) malloc(sizeof(int) * test_input_size);
+  }
+
+  
+
   llvm_hpvm_initTensorRt(1); 
 
   
@@ -59,12 +70,14 @@ int main(int argc, char* argv[]){
 
    int batch_count = test_input_size / batch_size; 
    float final_accuracy = 0.0; 
-   
+
+   std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); 
+   std::string input_path =  dir_prefix + std::string("input.bin"); 
+   std::string labels_path =  dir_prefix + std::string("labels.bin");
+   std::string labels32_path =  dir_prefix + std::string("labels32.bin");   
+
    for(int i = 0; i < batch_count; i++){
      
-     std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); 
-     std::string input_path =  dir_prefix + std::string("input.bin"); 
-     std::string labels_path =  dir_prefix + std::string("labels.bin"); 
      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
      std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
@@ -153,9 +166,16 @@ int main(int argc, char* argv[]){
      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
 
      float accuracy = computeAccuracy2(labels, batch_size, var_15, 100); 
-     final_accuracy += accuracy; 
-     freeBatchMemory(); 
- 
+     final_accuracy += accuracy;
+
+
+     if(shouldDumpClassConf){
+	int relative_start = start - offset;
+	int relative_end = end - offset;
+        copyClassConfsAndLabels(var_15, classConfs, predictedLabels, relative_start, relative_end);
+     }
+
+     freeBatchMemory();  
    }
 
    final_accuracy = final_accuracy / batch_count; 
@@ -164,6 +184,15 @@ int main(int argc, char* argv[]){
 
    if (final_accuracy < bench_acc)
      missed += 1;
+
+
+   if(shouldDumpClassConf){
+      int labels_start = offset;
+      int labels_end = offset + test_input_size;
+      uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end);
+      dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size);
+    }
+
  }
 
  dumpExecutionAccuracies(); 
diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_promise.cc
index 4398e721f9f7bebd28e54d1d5e682b712a159f8e..c66de402d86f406b141738b2c0652c5084af81b6 100644
--- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_promise.cc
+++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_promise.cc
@@ -6,8 +6,8 @@
 #include <sys/types.h> 
 #include <sys/stat.h> 
 #include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
+#include "tensor_runtime.h" 
+#include "utils.h" 
 
 
 int total_runs = 1;
@@ -17,31 +17,42 @@ int to_skip = 5;
 
 int main(int argc, char* argv[]){ 
 
- int test_input_size = 3000; 
- int batch_size = 1000;
- int offset = 5000;
+  int test_input_size = 3000; 
+  int batch_size = 1000;
+  int offset = 5000;
 
  
- if (argc > 1){
-   total_runs = atoi(argv[1]);
- }
+  if (argc > 1){
+    total_runs = atoi(argv[1]);
+  }
 
- if (argc > 2){
-   bench_acc = atof(argv[2]);
- }
+  if (argc > 2){
+    bench_acc = atof(argv[2]);
+  }
 
- if(argc > 3){
-   to_skip = atoi(argv[3]);   
- }
+  if(argc > 3){
+    to_skip = atoi(argv[3]);   
+  }
 
- if(argc > 4){
-   test_input_size = atoi(argv[4]);   
- }
+  if(argc > 4){
+    test_input_size = atoi(argv[4]);   
+  }
 
- if(argc > 5){
-   offset = atoi(argv[5]);   
- }
+  if(argc > 5){
+    offset = atoi(argv[5]);   
+  }
 
+ 
+  bool shouldDumpClassConf = false;
+  float* classConfs;
+  int* predictedLabels;
+  if(argc > 6){
+    shouldDumpClassConf = true;
+    classConfs = (float*) malloc(sizeof(float) * test_input_size);
+    predictedLabels = (int*) malloc(sizeof(int) * test_input_size);
+  }
+
+ 
 
  llvm_hpvm_initTensorRt(1); 
 
@@ -57,13 +68,14 @@ int main(int argc, char* argv[]){
    
    int batch_count = test_input_size / batch_size; 
    float final_accuracy = 0.0; 
-
+   
+   std::string dir_prefix = std::string("../model_params/vgg16_cifar10_2/");       
+   std::string input_path =  dir_prefix + std::string("input.bin"); 
+   std::string labels_path =  dir_prefix + std::string("labels.bin");
+   std::string labels32_path =  dir_prefix + std::string("labels32.bin"); 
+   
    for(int i = 0; i < batch_count; i++){ 
 
-     std::string dir_prefix = std::string("../model_params/vgg16_cifar10_2/");
-       
-     std::string input_path =  dir_prefix + std::string("input.bin"); 
-     std::string labels_path =  dir_prefix + std::string("labels.bin"); 
      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
      std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
@@ -151,7 +163,16 @@ int main(int argc, char* argv[]){
      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
 
      float accuracy = computeAccuracy2(labels, batch_size, var_15); 
-     final_accuracy += accuracy; 
+     final_accuracy += accuracy;
+
+
+     if(shouldDumpClassConf){
+	int relative_start = start - offset;
+	int relative_end = end - offset;
+        copyClassConfsAndLabels(var_15, classConfs, predictedLabels, relative_start, relative_end);
+     }
+
+     
      freeBatchMemory(); 
  
    }
@@ -162,6 +183,15 @@ int main(int argc, char* argv[]){
 
    if (final_accuracy < bench_acc)
      missed += 1;
+
+
+   if(shouldDumpClassConf){
+      int labels_start = offset;
+      int labels_end = offset + test_input_size;
+      uint32_t* goldLabels = readLabelsBatch3(labels32_path.c_str(), labels_start, labels_end);
+      dumpClassConfsAndLabels(classConfs, predictedLabels, goldLabels, test_input_size);
+   }
+
  }