diff --git a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py
index 55daa05dc698361f40390b0cf1e20bef7593057d..eeca0ed8ed8ed407b9c84592b22820857678b311 100644
--- a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py
+++ b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py
@@ -7,8 +7,8 @@
 # Batch 12: Error Sens: 10, 25, 35, for Loss1, 2, 3, respectively, Min: P3. 1000 Runs for All
 # Batch 13: No Error Sens: Equal Runs (1000) for all. Min: P1
 # Batch 14: Reruning Batch12 with bugFix!
-# Batch 15: MAJOR CHANGE: 3 different skip levels for each Loss1,Loss2,Loss3
-
+# Batch 16: MAJOR CHANGE: 3 different skip levels for each Loss1,Loss2,Loss3
+# Batch 17: Baseline with 3000 runs. Compare with Batch16
 
 
 class Benchmark:
@@ -46,9 +46,9 @@ Alexnet1.skip_layers = 0
 Alexnet1.skip_layer_str = "5_0"
 
 Alexnet1.base_dir = "../build_tuner/tuner_results/alexnet_cifar10/"
-Alexnet1.result_dir_1 = "../build_tuner/tuner_results/alexnet_cifar10/loss_1/batch15"
-Alexnet1.result_dir_2 = "../build_tuner/tuner_results/alexnet_cifar10/loss_2/batch15"
-Alexnet1.result_dir_3 = "../build_tuner/tuner_results/alexnet_cifar10/loss_3/batch15"
+Alexnet1.result_dir_1 = "../build_tuner/tuner_results/alexnet_cifar10/loss_1/batch17"
+Alexnet1.result_dir_2 = "../build_tuner/tuner_results/alexnet_cifar10/loss_2/batch17"
+Alexnet1.result_dir_3 = "../build_tuner/tuner_results/alexnet_cifar10/loss_3/batch17"
 
 Alexnet1.tensor_desc_file = "tuner_results/alexnet_cifar10/alexnet_tensors.txt"
 Alexnet1.layer_file = "tuner_results/alexnet_cifar10/alexnet_layers.txt"
@@ -79,9 +79,9 @@ Alexnet2.start_promise_range = 1
 Alexnet2.skip_layer_str = "6_1_0"
 
 Alexnet2.base_dir = "../build_tuner/tuner_results/alexnet2_cifar10/"
-Alexnet2.result_dir_1 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_1/batch15"
-Alexnet2.result_dir_2 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_2/batch15"
-Alexnet2.result_dir_3 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_3/batch15"
+Alexnet2.result_dir_1 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_1/batch17"
+Alexnet2.result_dir_2 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_2/batch17"
+Alexnet2.result_dir_3 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_3/batch17"
 Alexnet2.tensor_desc_file = "tuner_results/alexnet2_cifar10/alexnet2_tensors.txt"
 Alexnet2.layer_file = "tuner_results/alexnet2_cifar10/alexnet2_layers.txt"
 Alexnet2.cost_file = "../build_tuner/tuner_results/alexnet2_cifar10/op_cost.txt"
@@ -109,9 +109,9 @@ Alexnet3.start_promise_range = 1
 Alexnet3.skip_layer_str = "14_3_4_1_6"
 
 Alexnet3.base_dir = "../build_tuner/tuner_results/vgg16_cifar10/"
-Alexnet3.result_dir_1 = "../build_tuner/tuner_results/vgg16_cifar10/loss_1/batch15"
-Alexnet3.result_dir_2 = "../build_tuner/tuner_results/vgg16_cifar10/loss_2/batch15"
-Alexnet3.result_dir_3 = "../build_tuner/tuner_results/vgg16_cifar10/loss_3/batch15"
+Alexnet3.result_dir_1 = "../build_tuner/tuner_results/vgg16_cifar10/loss_1/batch17"
+Alexnet3.result_dir_2 = "../build_tuner/tuner_results/vgg16_cifar10/loss_2/batch17"
+Alexnet3.result_dir_3 = "../build_tuner/tuner_results/vgg16_cifar10/loss_3/batch17"
 
 Alexnet3.tensor_desc_file = "tuner_results/vgg16_cifar10/vgg16_tensors.txt"
 Alexnet3.layer_file = "tuner_results/vgg16_cifar10/vgg16_layers.txt"
@@ -141,11 +141,11 @@ Alexnet4.start_promise_range = 1
 #Alexnet4.skip_layer_str = "0"
 Alexnet4.skip_layer_str = "0_1_2_14_15_17_18_21"
 Alexnet4.base_dir = "../build_tuner/tuner_results/resnet18_cifar10/"
-Alexnet4.result_dir_1 = "../build_tuner/tuner_results/resnet18_cifar10/loss_1/batch15"
-Alexnet4.result_dir_2 = "../build_tuner/tuner_results/resnet18_cifar10/loss_2/batch15"
-Alexnet4.result_dir_3 = "../build_tuner/tuner_results/resnet18_cifar10/loss_3/batch15"
+Alexnet4.result_dir_1 = "../build_tuner/tuner_results/resnet18_cifar10/loss_1/batch17"
+Alexnet4.result_dir_2 = "../build_tuner/tuner_results/resnet18_cifar10/loss_2/batch17"
+Alexnet4.result_dir_3 = "../build_tuner/tuner_results/resnet18_cifar10/loss_3/batch17"
 Alexnet4.tensor_desc_file = "tuner_results/resnet18_cifar10/resnet_tensors.txt"
-Alexnet4.layer_file = "tuner_results/resnet18_cifar10/resnet_layers.txt"
+Alexnet4.layer_file = "tuner_results/resnet18_cifar10/resnet18_layers.txt"
 Alexnet4.cost_file = "../build_tuner/tuner_results/resnet18_cifar10/op_cost.txt"
 
 Alexnet4.loss1_result_file = "tuner_results/resnet18_cifar10/loss_1/promise_tuned_confs/promise_confs.txt"
@@ -174,9 +174,9 @@ Alexnet5.start_promise_range = 1
 #Alexnet5.skip_layer_str = "0"
 Alexnet5.skip_layer_str = "0_1_2_3_4"
 Alexnet5.base_dir = "../build_tuner/tuner_results/vgg16_cifar100/"
-Alexnet5.result_dir_1 = "../build_tuner/tuner_results/vgg16_cifar100/loss_1/batch15"
-Alexnet5.result_dir_2 = "../build_tuner/tuner_results/vgg16_cifar100/loss_2/batch15"
-Alexnet5.result_dir_3 = "../build_tuner/tuner_results/vgg16_cifar100/loss_3/batch15"
+Alexnet5.result_dir_1 = "../build_tuner/tuner_results/vgg16_cifar100/loss_1/batch17"
+Alexnet5.result_dir_2 = "../build_tuner/tuner_results/vgg16_cifar100/loss_2/batch17"
+Alexnet5.result_dir_3 = "../build_tuner/tuner_results/vgg16_cifar100/loss_3/batch17"
 
 Alexnet5.tensor_desc_file = "../build_tuner/tuner_results/vgg16_cifar100/vgg16_tensors.txt"
 Alexnet5.layer_file = "../build_tuner/tuner_results/vgg16_cifar100/vgg16_layers.txt"
@@ -206,9 +206,9 @@ Alexnet6.start_promise_range = 1
 Alexnet6.skip_layer_str = "0"
 
 Alexnet6.base_dir = "../build_tuner/tuner_results/lenet_keras/"
-Alexnet6.result_dir_1 = "../build_tuner/tuner_results/lenet_keras/loss_1/batch15"
-Alexnet6.result_dir_2 = "../build_tuner/tuner_results/lenet_keras/loss_2/batch15"
-Alexnet6.result_dir_3 = "../build_tuner/tuner_results/lenet_keras/loss_3/batch15"
+Alexnet6.result_dir_1 = "../build_tuner/tuner_results/lenet_keras/loss_1/batch17"
+Alexnet6.result_dir_2 = "../build_tuner/tuner_results/lenet_keras/loss_2/batch17"
+Alexnet6.result_dir_3 = "../build_tuner/tuner_results/lenet_keras/loss_3/batch17"
 
 Alexnet6.tensor_desc_file = "tuner_results/lenet_keras/lenet_tensors.txt"
 Alexnet6.layer_file = "tuner_results/lenet_keras/lenet_layers.txt"
@@ -239,9 +239,9 @@ Alexnet7.start_promise_range = 1
 #Alexnet7.skip_layer_str = "0"
 Alexnet7.skip_layer_str = "1_14_0_6_2"
 Alexnet7.base_dir = "../build_tuner/tuner_results/mobilenet/"
-Alexnet7.result_dir_1 = "../build_tuner/tuner_results/mobilenet/loss_1/batch15"
-Alexnet7.result_dir_2 = "../build_tuner/tuner_results/mobilenet/loss_2/batch15"
-Alexnet7.result_dir_3 = "../build_tuner/tuner_results/mobilenet/loss_3/batch15"
+Alexnet7.result_dir_1 = "../build_tuner/tuner_results/mobilenet/loss_1/batch17"
+Alexnet7.result_dir_2 = "../build_tuner/tuner_results/mobilenet/loss_2/batch17"
+Alexnet7.result_dir_3 = "../build_tuner/tuner_results/mobilenet/loss_3/batch17"
 
 Alexnet7.tensor_desc_file = "tuner_results/mobilenet/mobilenet_ops.txt"
 Alexnet7.layer_file = "tuner_results/mobilenet/mobilenet_layer_comp.txt"
@@ -271,9 +271,9 @@ Alexnet8.start_promise_range = 1
 #Alexnet8.skip_layer_str = "0"
 Alexnet8.skip_layer_str = "7_0_1"
 Alexnet8.base_dir = "../build_tuner/tuner_results/mobilenet_shallow/"
-Alexnet8.result_dir_1 = "../build_tuner/tuner_results/mobilenet_shallow/loss_1/batch15"
-Alexnet8.result_dir_2 = "../build_tuner/tuner_results/mobilenet_shallow/loss_2/batch15"
-Alexnet8.result_dir_3 = "../build_tuner/tuner_results/mobilenet_shallow/loss_3/batch15"
+Alexnet8.result_dir_1 = "../build_tuner/tuner_results/mobilenet_shallow/loss_1/batch17"
+Alexnet8.result_dir_2 = "../build_tuner/tuner_results/mobilenet_shallow/loss_2/batch17"
+Alexnet8.result_dir_3 = "../build_tuner/tuner_results/mobilenet_shallow/loss_3/batch17"
 
 Alexnet8.tensor_desc_file = "../build_tuner/tuner_results/mobilenet_shallow/mobilenet_shallow_ops.txt"
 Alexnet8.layer_file = "../build_tuner/tuner_results/mobilenet_shallow/mobilenet_shallow_layer_comp.txt"
diff --git a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py
index 0b59ff187d6387ac0616a3e8854029fbafec9b98..2d4a3bb9ca0189e7889abeca2888f985d1bbe380 100644
--- a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py
+++ b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py
@@ -52,42 +52,44 @@ def computeLayerSwings():
 
   
 
-
+gpu = 1
   
 def runPromiseTuner():
 
-  start = startProfile("LeNet")  
-  runPromiseBench(bench_tuner_data["lenet_keras"])
-  stopProfile("LeNet", start)
+  if gpu == 2:
+    start = startProfile("LeNet")  
+    runPromiseBench(bench_tuner_data["lenet_keras"])
+    stopProfile("LeNet", start)
    
-  start = startProfile("Alexnet")  
-  runPromiseBench(bench_tuner_data["alexnet_cifar10"])
-  stopProfile("Alexnet", start)
-
-  start = startProfile("Alexnet2")  
-  runPromiseBench(bench_tuner_data["alexnet2_cifar10"])
-  stopProfile("Alexnet2", start)  
-
-  start = startProfile("VGG16_10")  
-  runPromiseBench(bench_tuner_data["vgg16_cifar10"])
-  stopProfile("VGG16_10", start)  
+    start = startProfile("Alexnet")  
+    runPromiseBench(bench_tuner_data["alexnet_cifar10"])
+    stopProfile("Alexnet", start)
 
-  start = startProfile("VGG16_100")  
-  runPromiseBench(bench_tuner_data["vgg16_cifar100"])
-  stopProfile("VGG16_100", start)
+    start = startProfile("Alexnet2")  
+    runPromiseBench(bench_tuner_data["alexnet2_cifar10"])
+    stopProfile("Alexnet2", start)  
 
-  start = startProfile("ResNet")  
-  runPromiseBench(bench_tuner_data["resnet18_cifar10"])
-  stopProfile("ResNet", start)  
+    start = startProfile("ResNet")  
+    runPromiseBench(bench_tuner_data["resnet18_cifar10"])
+    stopProfile("ResNet", start)  
 
-  start = startProfile("MobileNet")  
-  runPromiseBench(bench_tuner_data["mobilenet_cifar10"])
-  stopProfile("MobileNet", start)
+  if gpu == 1:
+    
+    start = startProfile("VGG16_10")  
+    runPromiseBench(bench_tuner_data["vgg16_cifar10"])
+    stopProfile("VGG16_10", start)  
+  
+    start = startProfile("VGG16_100")  
+    runPromiseBench(bench_tuner_data["vgg16_cifar100"])
+    stopProfile("VGG16_100", start)
 
-  start = startProfile("MobileNet-SH")  
-  runPromiseBench(bench_tuner_data["mobilenet_shallow"])
-  stopProfile("MobileNet-SH", start)  
+    start = startProfile("MobileNet")  
+    runPromiseBench(bench_tuner_data["mobilenet_cifar10"])
+    stopProfile("MobileNet", start)
 
+    start = startProfile("MobileNet-SH")  
+    runPromiseBench(bench_tuner_data["mobilenet_shallow"])
+    stopProfile("MobileNet-SH", start)  
 
   #runPSNRPromiseBench("pipeline_GEOM")
   #runPSNRPromiseBench("pipeline_GEMO")
@@ -95,7 +97,7 @@ def runPromiseTuner():
   #runPSNRPromiseBench("pipeline_GSM")
   #runPSNRPromiseBench("pipeline_GSME")
 
-  dumpProfiles("time_profile_11.txt")
+  dumpProfiles("time_profile_17.txt")
   
   
 def runPromiseValidation():
@@ -134,6 +136,7 @@ def runAutotuner():
 
 def runSensAnalysis():
 
+  """
   start = startProfile("LeNet")  
   test_sensitivity3(bench_tuner_data["lenet_keras"])
   stopProfile("LeNet", start)  
@@ -145,7 +148,8 @@ def runSensAnalysis():
   start = startProfile("AlexNet2")  
   test_sensitivity3(bench_tuner_data["alexnet2_cifar10"])
   stopProfile("AlexNet2", start)  
-
+  """
+  
   start = startProfile("ResNet")  
   test_sensitivity3(bench_tuner_data["resnet18_cifar10"])
   stopProfile("ResNet", start)  
@@ -159,6 +163,7 @@ def runSensAnalysis():
   test_sensitivity3(bench_tuner_data["mobilenet_shallow"])
   stopProfile("MobileNet_SH", start)  
 
+  """
   start = startProfile("VGG_10")  
   test_sensitivity3(bench_tuner_data["vgg16_cifar10"])
   stopProfile("VGG16_10", start)  
@@ -166,7 +171,9 @@ def runSensAnalysis():
   start = startProfile("VGG_100")  
   test_sensitivity3(bench_tuner_data["vgg16_cifar100"]) 
   stopProfile("VGG16_100", start)  
-
+  
+  """
+  
   dumpProfiles("sens_time_prof.txt")
 
   
@@ -206,9 +213,9 @@ if __name__ == "__main__":
 
   #computeLayerSwings()
   
-  #runPromiseTuner()    
+  runPromiseTuner()    
 
-  runPromiseValidation()
+  #runPromiseValidation()
 
   #runSensAnalysis()
 
diff --git a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_hs_tuner.py b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_hs_tuner.py
index ae06613aaf1202d7a901e3365f25906e084bc959..f1a9c8f417bafdf4084a687670074101bec3faa0 100644
--- a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_hs_tuner.py
+++ b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_hs_tuner.py
@@ -7,7 +7,7 @@ from error_sensitivity import select_skip_layers
 
 def runPromiseTunerCmd(Bench, dir_prefix, result_dir, acc_threshold, autotuner_runs, skip_layers):
 
-  tuner_cmd = "python  ../opentuner/autotuner/promise_tuner3.py "
+  tuner_cmd = "python2  ../opentuner/autotuner/promise_tuner3.py "
   tuner_cmd += " --test-limit "
   tuner_cmd += str(autotuner_runs)
   tuner_cmd += " --binary ./"
@@ -48,7 +48,7 @@ def runPromiseTunerCmd(Bench, dir_prefix, result_dir, acc_threshold, autotuner_r
 
 def promiseTunerLoss1(Bench, dir_prefix):
 
-  tuner_runs = int(Bench.autotuner_runs / 3)
+  tuner_runs = Bench.autotuner_runs 
   
   skip_layers1 = "0"
   skip_layers2 = "0_" + select_skip_layers(Bench, 30)
@@ -61,7 +61,7 @@ def promiseTunerLoss1(Bench, dir_prefix):
 
 def promiseTunerLoss2(Bench, dir_prefix):
 
-  tuner_runs = int(Bench.autotuner_runs / 3)
+  tuner_runs = Bench.autotuner_runs 
   
   skip_layers1 = "0"
   skip_layers2 = "0_" + select_skip_layers(Bench, 20)
@@ -75,7 +75,7 @@ def promiseTunerLoss2(Bench, dir_prefix):
   
 def promiseTunerLoss3(Bench, dir_prefix):
 
-  tuner_runs = int(Bench.autotuner_runs / 3)
+  tuner_runs = Bench.autotuner_runs 
   
   skip_layers1 = "0"
   skip_layers2 = "0_" + select_skip_layers(Bench, 10)
@@ -86,6 +86,7 @@ def promiseTunerLoss3(Bench, dir_prefix):
   runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_3,  2.5, tuner_runs, skip_layers3)
   
 
+BASELINE = True
 
   
 def runPromiseBench(Bench):
@@ -93,12 +94,21 @@ def runPromiseBench(Bench):
   # NOTE-IMP: Changing current directory to one with promise binaries
   dir_prefix = "../build_tuner/"
   
-   
-  promiseTunerLoss1(Bench, dir_prefix)
 
-  promiseTunerLoss2(Bench, dir_prefix)
+  if BASELINE:
+    tuner_runs = Bench.autotuner_runs * 2
+    skip_layers = "0"
+    runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_1, 0.85, tuner_runs, skip_layers)
+    runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_2, 1.7, tuner_runs, skip_layers)
+    runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.5, tuner_runs, skip_layers)
+    
+  else:
+    
+    promiseTunerLoss1(Bench, dir_prefix)
+
+    promiseTunerLoss2(Bench, dir_prefix)
 
-  promiseTunerLoss3(Bench, dir_prefix)
+    promiseTunerLoss3(Bench, dir_prefix)
 
   
   
diff --git a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py
index 80e67a1bc372e6628404e9852c9f7809cbfd73be..b38efa9c82a1da4440fe4653b72b1beb89032a5f 100644
--- a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py
+++ b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py
@@ -19,8 +19,8 @@ def getAccuracy(file_name):
   return accuracy  
 
 
-total_runs = 60.0
-fails_allowed = 4
+total_runs = 40.0
+fails_allowed = 3
 skip_lines = 0
 
 
@@ -297,6 +297,8 @@ def getConfigCost(layer_costs, config_str):
       continue
 
     orig_cost += layer_costs[it]
+
+    #print ("orig_cost = ", orig_cost, " flag_value = ", flag_value) 
     
     if flag_value == 11:
       total_cost += layer_costs[it]
@@ -307,10 +309,12 @@ def getConfigCost(layer_costs, config_str):
     elif flag_value < 8:
       divisor = 5 + (7 - flag_value)
       total_cost += (layer_costs[it] / divisor)
-      
+ 
     it += 1
-    
-  return total_cost, (orig_cost / total_cost)
+
+  speedup = orig_cost * 1.0 / total_cost * 1.0
+  
+  return total_cost, speedup 
   
 
 
@@ -406,6 +410,7 @@ def dump_promise_confidence_files(binary, result_dir, layer_file_path,
 def dump_promise_confidence_files2(binary, result_dir, layer_file_path,
                                    num_flags, accuracy, layer_costs, confidence):
 
+
   #result_dir = args.result_dir
   output_dir = result_dir + "/high_confidence"
   input_dir = result_dir + "/full_results"
@@ -445,6 +450,49 @@ def dump_promise_confidence_files2(binary, result_dir, layer_file_path,
 
 
 
+
+def dump_promise_confidence_files3(binary, input_dir, output_dir, layer_file_path,
+                                   num_flags, accuracy, layer_costs, confidence):
+
+
+  #result_dir = args.result_dir
+  #output_dir = result_dir + "/high_confidence"
+  #input_dir = result_dir + "/full_results"
+
+  if not os.path.exists(output_dir):
+    os.mkdir(output_dir)    
+
+  layer_sizes = processLayerFile(layer_file_path);
+  print layer_sizes
+  sleep(2)
+    
+  confidence_list = compute_promise_confidence2(binary, accuracy, confidence, layer_costs, input_dir, output_dir)
+  print confidence_list
+
+  # Ascending sort on accuracy
+  sorted_list = sorted(confidence_list, key = lambda tup: tup[1])
+   
+  promise_file = open(output_dir + "/promise_confs.txt", "w+")
+  confidence_file = open(output_dir + "/confidence_summary.txt", "w+")
+
+  max_configs = 50
+  it_count = 0
+  for x in sorted_list:
+    if x[1] > accuracy and x[0] > confidence:
+      config_str = getLayerConfigStr(x[3], layer_sizes, num_flags)
+      promise_file.write(config_str + "\n")
+      it_count += 1
+      if it_count > max_configs:
+        break
+       
+    confidence_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[3]) + "\n")    
+    
+  promise_file.close()
+  confidence_file.close()
+  
+  print "Dumped Confidence Summary"
+
+
   
 
 
diff --git a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/pareto_curve.py b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/pareto_curve.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fda8f742cc0ef75e4b84232f397872b04554dd6
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/pareto_curve.py
@@ -0,0 +1,259 @@
+
+
+import os
+import shutil
+from measure_confidence2 import getConfigCost
+
+
+class Config:
+  def __init__(self):
+    self.avg_accuracy = 0
+    self.avg_loss = 0
+    self.speedup = 1
+    self.fname = ""
+    self.flags = []
+
+
+
+
+def skipFile(fname):
+
+  skip_files = {}
+  skip_files["confidence_summary.txt"] = 1
+  skip_files["promise_confs.txt"] = 1
+
+  if "accuracy" in fname:
+    return True
+
+  if fname in skip_files:
+    return True
+  else:
+    return False
+    
+
+  
+    
+def loadConfigData(result_dir, layer_costs, baseline_accuracy):
+
+  config_arr = []
+  
+  #result_dir += "/promise_tuner/high_confidence/"
+  file_names = os.listdir(result_dir)
+
+  
+  for fname in file_names:
+    if not skipFile(fname):
+
+      fpath = result_dir + fname  
+      config = Config()
+      f = open(fpath, "r")
+
+      config_str = f.read()
+      cost, speedup = getConfigCost(layer_costs, config_str)
+
+      config.speedup = speedup
+      config.fname = fname
+
+      fpath2 = fpath + "_accuracy"
+      f2 = open(fpath2, "r")
+      acc_str = f2.read().strip()
+      accuracy = float(acc_str)
+      
+      config.avg_accuracy = accuracy
+      config.avg_loss = baseline_accuracy - accuracy
+   
+      config_arr.append(config)
+        
+
+  return config_arr      
+
+    
+
+AL_THRESHOLD = 0.1
+SPEEDUP_BAND_SIZE = 0.3
+ENERGY_BAND_SIZE = 10
+
+
+class Configuration:
+    def __init__(self, name, speedup, energy, accuracy, accuracy_loss):
+        self.name = name
+        self.speedup = speedup
+        self.energy = energy
+        self.accuracy = accuracy
+        self.accuracy_loss = accuracy_loss
+    def __repr__(self):
+        return repr((self.name, self.speedup, self.energy, self.accuracy, self.accuracy_loss))
+
+configuration_objects = [
+    Configuration('conf1', 1.05, 15, 85, 1.2),
+    Configuration('conf2', 2.51, 12, 83, 1.4),
+    Configuration('conf3', 2.05, 10, 84, 0.8),
+]
+
+def compute_pareto_points(configurations):
+    speedupconfigurations = []
+    energyconfigurations = []
+    #sort configurations based on speedup
+    sorted_configurations = sorted(configurations, key=lambda conf: conf.accuracy_loss)
+
+    start_idx = 0
+    while start_idx < len(sorted_configurations):
+        end_idx = start_idx + 1;
+        # find end_idx
+        while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) :
+            end_idx += 1
+        # find best speedup end energy in this accuracy loss level
+        sp = -1.0
+        sp_idx = 0
+        en = -1.0
+        en_idx = 0
+        for i in range(start_idx, end_idx):
+            if sorted_configurations[i].speedup > sp:
+                sp = sorted_configurations[i].speedup
+                sp_idx = i
+            if sorted_configurations[i].energy > en:
+                en = sorted_configurations[i].energy
+                en_idx = i
+        sp_not_dominated = True
+        # if not empty list of configurations
+        if speedupconfigurations:
+            if speedupconfigurations[-1].speedup >= sp:
+                sp_not_dominated = False
+        en_not_dominated = True
+        # if not empty list of configurations
+        if energyconfigurations:
+            if energyconfigurations[-1].energy >= en:
+                en_not_dominated = False
+        if sp_not_dominated:
+            speedupconfigurations.append(sorted_configurations[sp_idx])
+        if en_not_dominated:
+            energyconfigurations.append(sorted_configurations[en_idx])
+        # outer while loop variable increment
+        start_idx = end_idx
+    return [speedupconfigurations, energyconfigurations]
+
+
+def compute_pareto_points_with_margin(configurations, speedup_band_width, energy_band_width):
+    speedupconfigurations = []
+    energyconfigurations = []
+    #sort configurations based on speedup
+    sorted_configurations = sorted(configurations, key=lambda conf: conf.accuracy_loss)
+
+    idx_to_sp_conf_dict = {}
+    idx_to_en_conf_dict = {}
+
+    start_idx = 0
+    while start_idx < len(sorted_configurations):
+        end_idx = start_idx + 1;
+        # find end_idx
+        while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) :
+            end_idx += 1
+        # find best speedup end energy in this accuracy loss level
+        sp = -1.0
+        sp_idx = 0
+        en = -1.0
+        en_idx = 0
+        for i in range(start_idx, end_idx):
+            if sorted_configurations[i].speedup > sp:
+                sp = sorted_configurations[i].speedup
+                sp_idx = i
+            if sorted_configurations[i].energy < en:
+                en = sorted_configurations[i].energy
+                en_idx = i
+        sp_not_dominated = True
+        # if not empty list of configurations
+        if speedupconfigurations:
+            if speedupconfigurations[-1].speedup >= sp:
+                sp_not_dominated = False
+        en_not_dominated = True
+        # if not empty list of configurations
+        if energyconfigurations:
+            if energyconfigurations[-1].energy >= en:
+                en_not_dominated = False
+        if sp_not_dominated:
+            speedupconfigurations.append(sorted_configurations[sp_idx])
+        idx_to_sp_conf_dict[start_idx] = len(speedupconfigurations)-1
+        if en_not_dominated:
+            energyconfigurations.append(sorted_configurations[en_idx])
+        idx_to_en_conf_dict[start_idx] = len(energyconfigurations)-1
+        # outer while loop variable increment
+        start_idx = end_idx
+
+    # We want to add configurations in a band of a certain width around the curves
+    # not possible to do during contruction, because the quality of the curve would
+    # deteriorate quickly
+
+    AdjustedSpeedupCurve = []
+    AdjustedEnergyCurve = []
+
+    start_idx = 0
+    while start_idx < len(sorted_configurations):
+        end_idx = start_idx + 1;
+        # find end_idx
+        while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) :
+            end_idx += 1
+        for i in range(start_idx, end_idx):
+            if sorted_configurations[i].speedup + speedup_band_width >= speedupconfigurations[idx_to_sp_conf_dict[start_idx]].speedup:
+                AdjustedSpeedupCurve.append(sorted_configurations[i])
+            if sorted_configurations[i].energy + energy_band_width >= energyconfigurations[idx_to_en_conf_dict[start_idx]].energy:
+                AdjustedEnergyCurve.append(sorted_configurations[i])
+        # outer while loop variable increment
+        start_idx = end_idx
+
+    return [AdjustedSpeedupCurve, AdjustedEnergyCurve]
+
+
+
+def findParetoConfigs(base_dir, layer_costs, accuracy):
+
+  result_dir = base_dir + "/pareto/"
+  try:
+      os.mkdir(result_dir)
+  except:
+      print "could not create dir"
+
+  input_dir = base_dir + "/full_results/"    
+  #result_dir = "../build_tuner/tuner_results/alexnet_cifar10/loss_3/batch15"
+  config_arr = loadConfigData(input_dir, layer_costs, accuracy)
+
+  config_list = []
+
+  it = 0
+  for config in config_arr:
+    config = Configuration(config.fname , config.speedup, 100, config.avg_accuracy, config.avg_loss)
+    config_list.append(config)
+
+
+  if len(config_list) < 30:
+    SPEEDUP_BAND_SIZE = 1.2
+    
+    
+  ASC, AEC = compute_pareto_points_with_margin(config_list, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE)
+
+  
+  print ("len(config_list) = ", len(config_list))
+  print ("len(ASC) = ", len(ASC))
+
+  #print (ASC)
+  #print (config_list)
+
+  for conf in ASC:
+    #dst_path = conf.name.replace("full_results", "pareto")
+    src_path = base_dir + "/full_results/" + conf.name
+    dst_path = base_dir + "/pareto/" + conf.name
+    shutil.copy(src_path, dst_path)
+    
+  
+
+if __name__ == "__main__":
+
+  get_pareto_configs("")
+  
+  #SC, EC = compute_pareto_points(configuration_objects)
+  #ASC, AEC = compute_pareto_points_with_margin(configuration_objects, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE)
+
+  #print(SC)
+  #print(EC)
+
+  #print(ASC)
+  #print(AEC)
diff --git a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py
index 07a5bf0bcf4b9135a746f0dd733daa2699d7ad58..87ed35bbc4bcac6288c30454ba1d650956dd9118 100644
--- a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py
+++ b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py
@@ -21,9 +21,10 @@ import subprocess
 import threading
 import psutil
 
-from measure_confidence2 import dump_promise_confidence_files2
+from measure_confidence2 import dump_promise_confidence_files3
 from select_top_results import select_top_results
 from time import sleep
+from pareto_curve import findParetoConfigs
 
 
 layer_file = ""
@@ -48,6 +49,7 @@ def readCostFile(file_path):
     cost = float(x.strip())
     layer_costs.append(cost)
 
+  print ("len(layer_costs) = ", layer_costs)
   f.close()
   
   
@@ -192,7 +194,11 @@ class ClangFlagsTuner(MeasurementInterface):
       evaluated_configs[accuracy] = 1
       shutil.copy('promise_flags', output_dir + '/' + binary_name + '_' + str(test_id))
 
-        
+      f_acc = open(output_dir + '/' + binary_name + '_' + str(test_id) + "_accuracy", "w")
+      f_acc.write(str(accuracy))
+      f_acc.close()
+                   
+      
     print "done with one run"
 
     test_id += 1
@@ -203,12 +209,17 @@ class ClangFlagsTuner(MeasurementInterface):
   def save_final_config(self, configuration):
 
     print "Dumping High Confidence results \n"
-    sleep(20)
+    sleep(2)
+
+
+    findParetoConfigs(orig_result_dir, layer_costs, accuracy_threshold)
+
+    input_dir = orig_result_dir + "/pareto/"
+    output_dir = orig_result_dir + "/high_confidence/"
     
     # Only dumping files with 95% confidence
-    dump_promise_confidence_files2(binary_name, orig_result_dir, layer_file, num_flags, accuracy_threshold, layer_costs, 95)
+    dump_promise_confidence_files3(binary_name, input_dir, output_dir, layer_file, num_flags, accuracy_threshold, layer_costs, 95)
     #select_top_results(orig_result_dir + "/high_confidence")
-
   
     
     """
diff --git a/llvm/projects/soc_simulator/src/driver.py b/llvm/projects/soc_simulator/src/driver.py
index b685170da40c17dc45b7258cbbe2166ac52736fc..1df46eec8fc34cee7c6a7683d1faaae4a94639ca 100644
--- a/llvm/projects/soc_simulator/src/driver.py
+++ b/llvm/projects/soc_simulator/src/driver.py
@@ -3,291 +3,297 @@ import os
 import subprocess
 import sys
 
-def build_nested_default_dict():
-	return defaultdict(build_nested_default_dict)
+class Driver:
+    fp16_swing = 8
 
-def is_conv(operation_name):
-    return operation_name.startswith("Conv")
+    class ApproxTypes:
+        FP16 = 0
+        FP32 = 1
+        PROMISE = 2
 
-def is_nml(operation_name):
-    return operation_name.startswith("NML")
+    results_time_key = "Time"
+    results_energy_key = "Energy"
 
-def is_fc(operation_name):
-    return operation_name.startswith("FC")
 
-# NOTE: Use an OrderedDict if we want to search by operation name 
-# Using a list bc we care about the order the data is read in
-# since it corresponds to the data in the configuration file
-tensor_layers = []
-def parse_tensor_layer_file(layer_filename): 
-    if not os.path.isfile(layer_filename):
-        print("ERROR: %s was not found." % layer_filename)
-        exit(1)
+    def driver(self):
+        self.__parse_tensor_layer_file()
+        self.__parse_tensor_table()
+        self.__run_simulations()
+        self.__display_results()
 
-    layer_file = open(layer_filename, "r")
-    for line in layer_file:
-        layer_data = line.strip().split(',')
-        layer_name = layer_data[0]
-
-        tensor_layer = defaultdict(str)
-        tensor_layer["Name"] = layer_name
-
-        if is_conv(layer_name):
-            tensor_layer["N"] = float(layer_data[1])
-            tensor_layer["Cin"] = float(layer_data[2])
-            tensor_layer["H"] = float(layer_data[3])
-            tensor_layer["W"] = float(layer_data[4])
-            tensor_layer["Cout"] = float(layer_data[5])
-            tensor_layer["Kh"] = float(layer_data[7])
-            tensor_layer["Kw"] = float(layer_data[8])
-            tensor_layer["Sh"] = float(layer_data[9])
-            tensor_layer["Sw"] = float(layer_data[10])
-
-        elif is_fc(layer_name):
-            tensor_layer["RA"] = float(layer_data[1])
-            tensor_layer["CA"] = float(layer_data[2])
-            tensor_layer["RB"] = float(layer_data[3])
-            tensor_layer["CB"] = float(layer_data[4])
-
-        elif not is_nml(layer_name): # TODO should we store data for NMLs?
-			print("ERROR: Invalid layer name %s" % layer_name)
-			exit(1)
-
-        tensor_layers.append(tensor_layer)
-    layer_file.close()
-
-# [layer_name][operation_name][cols] 
-# Operation names need to be stored in order of insertion 
-tensor_table = defaultdict(lambda: list(defaultdict(str)))
-
-def parse_tensor_table(table_filename): 
-    if not os.path.isfile(table_filename):
-        print("ERROR: %s was not found." % table_filename)
-        exit(1)
-    table_file = open(table_filename, "r")
-    line = table_file.readline().strip()
 
-    while line:
-        # Line here MUST be a header or there's a bug 
-        # Get the description of the layer 
-        assert(line.startswith("**"))
+    def __init__(self, layer_filename, table_filename, config_filename, results_filename):
+        self.__layer_filename = layer_filename
+        self.__table_filename = table_filename
+        self.__config_filename = config_filename
+        self.__results_filename = results_filename
 
-        header_contents = line.split(' ')[1:] 
-        layer_name = header_contents[0]
-        num_ops = int(header_contents[1])
-        col_names = header_contents[2:]
+        # NOTE: Use an OrderedDict if we want to search by operation name 
+        # Using a list bc we care about the order the data is read in
+        # since it corresponds to the data in the configuration file
+        self.__tensor_layers = []
 
-        layer_operations = []
+        # [layer_name][operation_name][cols] 
+        # Operation names need to be stored in order of insertion 
+        self.__tensor_table = defaultdict(lambda: list(defaultdict(str)))
 
-        # Go through all operations in the layer
-        for op_count in range(num_ops):
-            operation_data = defaultdict(str)
+        # [Time/Energy][number corresponding to order the layer config was read in] = time/energy
+        self.__aggregate_results = defaultdict(lambda: defaultdict(float))
+        self.__config_count = 0
 
-            line = table_file.readline().strip()
-            op_data = line.split(' ')
-            op_name = op_data[0]
-            operation_data["Name"] = op_name
 
-            # Number of data items (#s) needs to match up with the # of cols 
-            assert(len(op_data) - 1 == len(col_names)) 
+    @staticmethod
+    def is_conv(operation_name):
+        return operation_name.startswith("Conv")
+
+
+    @staticmethod
+    def is_nml(operation_name):
+        return operation_name.startswith("NML")
+
+
+    @staticmethod
+    def is_fc(operation_name):
+        return operation_name.startswith("FC")
+
+
+    def __parse_tensor_layer_file(self): 
+        if not os.path.isfile(self.__layer_filename):
+            print("ERROR: %s was not found." % self.__layer_filename)
+            exit(1)
+
+        layer_file = open(self.__layer_filename, "r")
+        for line in layer_file:
+            layer_data = line.strip().split(',')
+            layer_name = layer_data[0]
+
+            tensor_layer = defaultdict(str)
+            tensor_layer["Name"] = layer_name
+
+            if Driver.is_conv(layer_name):
+                tensor_layer["N"] = float(layer_data[1])
+                tensor_layer["Cin"] = float(layer_data[2])
+                tensor_layer["H"] = float(layer_data[3])
+                tensor_layer["W"] = float(layer_data[4])
+                tensor_layer["Cout"] = float(layer_data[5])
+                tensor_layer["Kh"] = float(layer_data[7])
+                tensor_layer["Kw"] = float(layer_data[8])
+                tensor_layer["Sh"] = float(layer_data[9])
+                tensor_layer["Sw"] = float(layer_data[10])
+
+            elif Driver.is_fc(layer_name):
+                tensor_layer["RA"] = float(layer_data[1])
+                tensor_layer["CA"] = float(layer_data[2])
+                tensor_layer["RB"] = float(layer_data[3])
+                tensor_layer["CB"] = float(layer_data[4])
 
-            # Go through all data items (each col element) per operation 
-            for i in range(len(col_names)):
-                operation_data[col_names[i]] = float(op_data[i + 1])
+            elif not Driver.is_nml(layer_name): # TODO should we store data for NMLs?
+                print("ERROR: Invalid layer name %s" % layer_name)
+                exit(1)
 
-            layer_operations.append(operation_data)
+            self.__tensor_layers.append(tensor_layer)
+        layer_file.close()
 
-        tensor_table[layer_name] = layer_operations
+
+    def __parse_tensor_table(self): 
+        if not os.path.isfile(self.__table_filename):
+            print("ERROR: %s was not found." % self.__table_filename)
+            exit(1)
+        table_file = open(self.__table_filename, "r")
         line = table_file.readline().strip()
-    table_file.close()
-   
-fp16_swing = 8
-
-class ApproxTypes:
-    FP16 = 0
-    FP32 = 1
-    PROMISE = 2
-
-def get_approx_type(approx_type):
-    if approx_type == 0:
-        return "fp16"
-    elif approx_type == 1:
-        return "fp32"
-    return "promise"
-
-def is_promise(config_layer):
-    # TODO overhead in call to split?
-    return float(config_layer.split(' ')[0]) < fp16_swing
-
-
-def quantize(curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data):
-    #print(get_approx_type(curr_layer), get_approx_type(prev_layer))
-    if curr_layer == prev_layer or curr_layer == ApproxTypes.PROMISE \
-                or prev_layer == ApproxTypes.PROMISE: # No quantization needed
-        return 0.0, 0.0
-   
-    layer_name = layer_data["Name"]
-
-	# NOTE: Ignoring logic where curr == promise or prev == promise bc 
-	# smartDMA is always true so we'd return near the beginning of the method
-
-    # Get h2f/f2h data using the first tensor operation in the layer
-    # (which is why order matters in the tensor table)
-    tensor_op_row = tensor_table[layer_name][h2f_f2h_operation_ind]  
-    if curr_layer == ApproxTypes.FP32:
-        time = tensor_op_row["h2f_time"]
-        energy = tensor_op_row["h2f_energy"]
-    elif curr_layer == ApproxTypes.FP16:
-        time = tensor_op_row["f2h_time"]
-        energy = tensor_op_row["f2h_energy"]
-
-    print("Quantization: (%f, %f)" % (time, energy))
-    return (time, energy)
-
-def run_promise_simulation(swing, layer_data):
-    layer_name = layer_data["Name"] 
-    patch_factor = 1 
-
-    if is_conv(layer_name): 
-        rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \
-				/ (layer_data["Sh"] * layer_data["Sw"])
-        cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"]
-        rows_b = cols_a
-        cols_b = layer_data["Cout"]
-        patch_factor = layer_data["Kh"] * layer_data["Kw"]
-    elif is_fc(layer_name):
-        rows_a = layer_data["RA"] 
-        cols_a = layer_data["CA"]
-        rows_b = cols_a
-        cols_b = layer_data["CB"]
-    else:
-        print("PROMISE can't run whatever this layer is.")
-        exit(1)
-    #print("[%f x %f] x [%f x %f] : %f" % (rows_a, cols_a, rows_b, cols_b, swing)) 
-	# Run promise simulator
-	# TODO need to print time and energy in the ptm runner so we can pipe it
-    output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \
-                str(cols_b), str(patch_factor), str(swing)], \
-                stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0]
-    total_time_energy = output.strip().split(',')
-
-    assert(len(total_time_energy) == 2)
-    print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1]))
-    return float(total_time_energy[0]), float(total_time_energy[1])
-
-
-def run_gpu_simulation(curr_layer, layer_name, tensor_ind):
-    tensor_info = tensor_table[layer_name][tensor_ind]
-    if curr_layer == ApproxTypes.FP32:
-        conversion_time = tensor_info["fp32_time"]
-        conversion_energy = tensor_info["fp32_energy"]
-    else:
-        conversion_time = tensor_info["fp16_time"]
-        conversion_energy = tensor_info["fp16_energy"]
-    print("GPU: (%f, %f)" % (conversion_time, conversion_energy))
-    return (conversion_time, conversion_energy)
-
-# Default dict of default dicts 
-results_time_key = "Time"
-results_energy_key = "Energy"
-# [Time/Energy][number corresponding to order the layer config was read in] = time/energy
-aggregate_results = defaultdict(lambda: defaultdict(float))
-config_count = 0
-
-def run_simulations(config_filename):
-    global config_count
-
-    if not os.path.isfile(config_filename):
-        print("ERROR: %s was not found" % config_filename)
-        exit(1)
 
-    config_file = open(config_filename, "r")
-  
-    # each line = indepedent configuration
-    # layers are separated by commas
-    # tensor ops are separated by spaces
-    for config in config_file:
-        config_layers = config.strip().split(',')
-        prev_layer = ApproxTypes.FP32
-        curr_layer = None
-
-        for layer_ind, config_layer in enumerate(config_layers): # level
-            layer_data = tensor_layers[layer_ind]  # layer
-            layer_name = layer_data["Name"]
-
-            if is_promise(config_layer):
-                print("Running layer %s on PROMISE" % layer_name)
-                curr_layer = ApproxTypes.PROMISE
-                quant_time, quant_energy = quantize(curr_layer, prev_layer, 0, layer_data)
-                # Compute 
-                time, energy = run_promise_simulation(config_layer, layer_data)
-                print(time, energy)
-                aggregate_results[results_time_key][config_count] += time
-                aggregate_results[results_energy_key][config_count] += energy 
-            else:
-                print("Running layer %s on the GPU" % layer_name)
-                tensor_ops = config_layer.split(' ')
-
-                total_time = 0
-                total_energy = 0
-                for tensor_ind, tensor_op in enumerate(tensor_ops): # sublevle
-                    tensor_op = int(tensor_op)
-                    if tensor_op == fp16_swing:
-                        curr_layer = ApproxTypes.FP16
-                    else:
-                        curr_layer = ApproxTypes.FP32
-                    quant_time, quant_energy = quantize(curr_layer, prev_layer, tensor_ind, \
-                                layer_data)
-                    conv_time, conv_energy = run_gpu_simulation(curr_layer, layer_name, tensor_ind)
-                    total_time += quant_time + conv_time 
-                    total_energy += quant_energy + conv_energy
-
-                aggregate_results[results_time_key][config_count] += total_time
-                aggregate_results[results_energy_key][config_count] += total_energy 
-
-            prev_layer = curr_layer
-        config_count += 1
-        print("\n")
-    config_file.close()
-
-
-def display_results(results_filename):
-    results_file = open(results_filename, "w")
-    attributes_to_print = [results_time_key, results_energy_key]
-
-    for attribute in attributes_to_print:
-        results_file.write("%s\n" % attribute)
-        results_file.write("Configuration,Total,Improvement\n") 
-
-        baseline_val = aggregate_results[attribute][0]
-        print(baseline_val)
-        best_config = None
-        best_result = None
-
-        for config_ind in range(config_count):
-            results_file.write("c%d" % config_ind)
-            time_or_energy_val = aggregate_results[attribute][config_ind]
-            results_file.write(",%f" % time_or_energy_val)
-            results_file.write(",%f\n" % (baseline_val / (time_or_energy_val + 0.0001)))
-
-            if not best_result or time_or_energy_val < best_result:
-                best_result = time_or_energy_val
-                best_config = config_ind
-        results_file.write("\nc%d,%f\n\n" % (best_config, aggregate_results[attribute][best_config]))
-    results_file.close()
-
-def driver(tensor_layers_file, tensor_table_file, conf_file, output_file):
-    print(tensor_layers_file, tensor_table_file, conf_file, output_file)
-    parse_tensor_layer_file(tensor_layers_file)
-    parse_tensor_table(tensor_table_file)
-    run_simulations(conf_file)
-    display_results(output_file)
+        while line:
+            # Line here MUST be a header or there's a bug 
+            # Get the description of the layer 
+            assert(line.startswith("**"))
+
+            header_contents = line.split(' ')[1:] 
+            layer_name = header_contents[0]
+            num_ops = int(header_contents[1])
+            col_names = header_contents[2:]
+
+            layer_operations = []
+
+            # Go through all operations in the layer
+            for op_count in range(num_ops):
+                operation_data = defaultdict(str)
+
+                line = table_file.readline().strip()
+                op_data = line.split(' ')
+                op_name = op_data[0]
+                operation_data["Name"] = op_name
+
+                # Number of data items (#s) needs to match up with the # of cols 
+                assert(len(op_data) - 1 == len(col_names)) 
+
+                # Go through all data items (each col element) per operation 
+                for i in range(len(col_names)):
+                    operation_data[col_names[i]] = float(op_data[i + 1])
+
+                layer_operations.append(operation_data)
+
+            self.__tensor_table[layer_name] = layer_operations
+            line = table_file.readline().strip()
+        table_file.close()
+
+
+    @staticmethod
+    def is_promise(config_layer):
+        return float(config_layer.split(' ')[0]) < Driver.fp16_swing
+
+
+    def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data):
+        if curr_layer == prev_layer or curr_layer == Driver.ApproxTypes.PROMISE \
+                    or prev_layer == Driver.ApproxTypes.PROMISE: # No quantization needed
+            return 0.0, 0.0
+       
+        layer_name = layer_data["Name"]
+
+        # NOTE: Ignoring logic where curr == promise or prev == promise bc 
+        # smartDMA is always true so we'd return near the beginning of the method
+
+        # Get h2f/f2h data using the first tensor operation in the layer
+        # (which is why order matters in the tensor table)
+        tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind]  
+        if curr_layer == Driver.ApproxTypes.FP32:
+            time = tensor_op_row["h2f_time"]
+            energy = tensor_op_row["h2f_energy"]
+        elif curr_layer == Driver.ApproxTypes.FP16:
+            time = tensor_op_row["f2h_time"]
+            energy = tensor_op_row["f2h_energy"]
+
+        print("Quantization: (%f, %f)" % (time, energy))
+        return (time, energy)
+
+
+    def __run_promise_simulation(self, swing, layer_data):
+        layer_name = layer_data["Name"] 
+        patch_factor = 1 
+
+        if Driver.is_conv(layer_name): 
+            rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \
+                    / (layer_data["Sh"] * layer_data["Sw"])
+            cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"]
+            rows_b = cols_a
+            cols_b = layer_data["Cout"]
+            patch_factor = layer_data["Kh"] * layer_data["Kw"]
+        elif Driver.is_fc(layer_name):
+            rows_a = layer_data["RA"] 
+            cols_a = layer_data["CA"]
+            rows_b = cols_a
+            cols_b = layer_data["CB"]
+        else:
+            print("PROMISE can't run whatever this layer is.")
+            exit(1)
+        # Run promise simulator
+        # TODO need to print time and energy in the ptm runner so we can pipe it
+        output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \
+                    str(cols_b), str(patch_factor), str(swing)], \
+                    stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0]
+        total_time_energy = output.strip().split(',')
+
+        assert(len(total_time_energy) == 2)
+        print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1]))
+        return float(total_time_energy[0]), float(total_time_energy[1])
+
+
+    def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind):
+        tensor_info = self.__tensor_table[layer_name][tensor_ind]
+        if curr_layer == Driver.ApproxTypes.FP32:
+            conversion_time = tensor_info["fp32_time"]
+            conversion_energy = tensor_info["fp32_energy"]
+        else:
+            conversion_time = tensor_info["fp16_time"]
+            conversion_energy = tensor_info["fp16_energy"]
+        print("GPU: (%f, %f)" % (conversion_time, conversion_energy))
+        return (conversion_time, conversion_energy)
+
+
+    def __run_simulations(self):
+        print("run sim")
+        if not os.path.isfile(self.__config_filename):
+            print("ERROR: %s was not found" % self.__config_filename)
+            exit(1)
+
+        config_file = open(self.__config_filename, "r")
+      
+        # each line = indepedent configuration
+        # layers are separated by commas
+        # tensor ops are separated by spaces
+        for config in config_file:
+            config_layers = config.strip().split(',')
+            prev_layer = Driver.ApproxTypes.FP32
+            curr_layer = None
+
+            for layer_ind, config_layer in enumerate(config_layers): # level
+                layer_data = self.__tensor_layers[layer_ind]  # layer
+                layer_name = layer_data["Name"]
+
+                if Driver.is_promise(config_layer):
+                    print("Running layer %s on PROMISE" % layer_name)
+                    curr_layer = Driver.ApproxTypes.PROMISE
+                    quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, 0, layer_data)
+                    # Compute 
+                    time, energy = self.__run_promise_simulation(config_layer, layer_data)
+                    print(time, energy)
+                    self.__aggregate_results[Driver.results_time_key][self.__config_count] += time
+                    self.__aggregate_results[Driver.results_energy_key][self.__config_count] += energy 
+                else:
+                    print("Running layer %s on the GPU" % layer_name)
+                    tensor_ops = config_layer.split(' ')
+
+                    total_time = 0
+                    total_energy = 0
+                    for tensor_ind, tensor_op in enumerate(tensor_ops): # sublevle
+                        tensor_op = int(tensor_op)
+                        if tensor_op == Driver.fp16_swing:
+                            curr_layer = Driver.ApproxTypes.FP16
+                        else:
+                            curr_layer = Driver.ApproxTypes.FP32
+                        quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, tensor_ind, layer_data)
+                        conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_ind)
+                        total_time += quant_time + conv_time 
+                        total_energy += quant_energy + conv_energy
+
+                    self.__aggregate_results[Driver.results_time_key][self.__config_count] += total_time
+                    self.__aggregate_results[Driver.results_energy_key][self.__config_count] += total_energy 
+
+                prev_layer = curr_layer
+            self.__config_count += 1
+            print("\n")
+        config_file.close()
+
+
+    def __display_results(self):
+        results_file = open(self.__results_filename, "w")
+        attributes_to_print = [Driver.results_time_key, Driver.results_energy_key]
+
+        for attribute in attributes_to_print:
+            results_file.write("%s\n" % attribute)
+            results_file.write("Configuration,Total,Improvement\n") 
+
+            baseline_val = self.__aggregate_results[attribute][0]
+            print(baseline_val)
+            best_config = None
+            best_result = None
+
+            for config_ind in range(self.__config_count):
+                results_file.write("c%d" % config_ind)
+                time_or_energy_val = self.__aggregate_results[attribute][config_ind]
+
+                # Using repr to keep all decimal digits when writing to file
+                results_file.write(",%s" % repr(time_or_energy_val))
+                results_file.write(",%s\n" % repr(baseline_val / (time_or_energy_val + 0.0001)))
+
+                if not best_result or time_or_energy_val < best_result:
+                    best_result = time_or_energy_val
+                    best_config = config_ind
+            results_file.write("\nc%d,%s\n\n" % (best_config, repr(self.__aggregate_results[attribute][best_config])))
+        results_file.close()
+
 
 if __name__ == "__main__":
     if len(sys.argv) != 5:
         print("Usage: python driver.py <layer info> <tensor info> <configurations> <results file>")
         exit(1)
-    test_layers_file = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_mobilenet/mobilenet_layers.txt"
-    test_table_file = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_pldi/mobilenet_results/mobilenet_tensors.txt"
-    test_conf_file = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_mobilenet/mobilenet_conf2.txt"
-    driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
+    Driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]).driver()