diff --git a/llvm/projects/hpvm-tensor-rt/README.md b/llvm/projects/hpvm-tensor-rt/README.md
index c243c6dd1662083021a642b9a088fa55f5d1ed3d..5b3e5f99d39cf5c697051fa2580eb74f207bb031 100644
--- a/llvm/projects/hpvm-tensor-rt/README.md
+++ b/llvm/projects/hpvm-tensor-rt/README.md
@@ -6,10 +6,25 @@
 * CUDA-9.0 or above
 * CUBLAS-9.0 or above - often included with cuda-toolkit
 
+## Dependent Library Builds
+
+```shell
+cd ../gpu_profiler
+mkdir lib
+cmake ../
+make
+
+cd ../soc_simulator
+mkdir lib
+cmake ../
+make
+```
+
+
 ## BUILD
 
 ```shell
-source bin/setup_runtime_paths.sh
+source bin/setup_cuda_llvm_paths.sh
 mkdir build
 cd build
 cmake ../
diff --git a/llvm/projects/hpvm-tensor-rt/bin/setup_cuda_llvm_paths.sh b/llvm/projects/hpvm-tensor-rt/bin/setup_cuda_llvm_paths.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3548f182f198724600aee855b66169a1bdf12a3a
--- /dev/null
+++ b/llvm/projects/hpvm-tensor-rt/bin/setup_cuda_llvm_paths.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+# CUDNN Path setup
+module load cuda-toolkit/9.1
+export CUDA_INCLUDE_PATH=/software/cuda-9.1/include
+export CUDNN_PATH=/software/cuda-9.1/lib64/
+export LIBRARY_PATH=/software/cuda-9.1/lib64/:$LIBRARY_PATH
+export LD_LIBRARY_PATH=/software/cuda-9.1/lib64/:$LD_LIBRARY_PATH
+
+# HPVM Path setup
+export CPATH=$CPATH:/home/hsharif3/anaconda2/include/
+export PATH=/home/hsharif3/Gitlab/hpvm/build/bin/:$PATH
+export LLVM_BUILD_ROOT=/home/hsharif3/Gitlab/hpvm/build/
+export LLVM_SRC_ROOT=/home/hsharif3/Gitlab/hpvm/llvm/
diff --git a/llvm/projects/hpvm-tensor-rt/bin/setup_jetson.sh b/llvm/projects/hpvm-tensor-rt/bin/setup_jetson.sh
index 2ad4d5bc765e23841b87ebdcb778295456c2b380..b288ccfe43c577f9ad14c4eb16284539ae5682ea 100644
--- a/llvm/projects/hpvm-tensor-rt/bin/setup_jetson.sh
+++ b/llvm/projects/hpvm-tensor-rt/bin/setup_jetson.sh
@@ -1,3 +1,8 @@
 
 export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda-9.0/targets/aarch64-linux/lib/
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-9.0/targets/aarch64-linux/lib/
+export CUDNN_PATH=/usr/local/cuda-9.0/
+export CUDA_INCLUDE_PATH=${CUDNN_PATH}/include
+
+export LLVM_BUILD_ROOT=/home/nvidia/Gitlab/hpvm/build/
+export LLVM_SRC_ROOT=/home/nvidia/Gitlab/hpvm/llvm/
diff --git a/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py b/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py
index 2c57eaf5be7c09a05859221535a7aff709330fcf..e3b94082f5be7b83a1598625afd5ef05a0472506 100644
--- a/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py
+++ b/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py
@@ -65,7 +65,7 @@ class TableGenerator:
         3. Writes the internal table to <network_name>_tensors.txt file and uses the 
         <network_name>_ops.txt file as a guideline in terms of row order 
         '''
-        #self.__run_inputted_binaries()
+        self.__run_inputted_binaries()
         self.__build_internal_table()
         self.__output_table_to_file()
 
diff --git a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py
index b38efa9c82a1da4440fe4653b72b1beb89032a5f..9ff74128f4e3a21545c9b7658638d4e44b758cbd 100644
--- a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py
+++ b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py
@@ -83,7 +83,23 @@ def getConfidence(accuracy_outfile, acc_threshold):
   
   return conf, avg_acc
 
-  
+
+
+def getMinAccuracy(accuracy_outfile):
+
+  f = open(accuracy_outfile, "r")
+
+  total_acc = 0.0
+  failed = 0
+  it = 0
+
+  acc_list = []
+  for x in f:
+    acc = float(x.strip())
+    acc_list.append(acc)
+    
+  return min(acc_list)
+
   
 # NOTE: invokes the binary with the number of runs
 def do_multiple_runs2(binary_name, accuracy_threshold, confidence_threshold):
diff --git a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/pareto_curve.py b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/pareto_curve.py
index 0fda8f742cc0ef75e4b84232f397872b04554dd6..db8233994b855317095c94331fba869d9ad79d16 100644
--- a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/pareto_curve.py
+++ b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/pareto_curve.py
@@ -5,6 +5,9 @@ import shutil
 from measure_confidence2 import getConfigCost
 
 
+AL_THRESHOLD = 0.1
+  
+
 class Config:
   def __init__(self):
     self.avg_accuracy = 0
@@ -69,10 +72,6 @@ def loadConfigData(result_dir, layer_costs, baseline_accuracy):
 
     
 
-AL_THRESHOLD = 0.1
-SPEEDUP_BAND_SIZE = 0.3
-ENERGY_BAND_SIZE = 10
-
 
 class Configuration:
     def __init__(self, name, speedup, energy, accuracy, accuracy_loss):
@@ -223,11 +222,17 @@ def findParetoConfigs(base_dir, layer_costs, accuracy):
     config = Configuration(config.fname , config.speedup, 100, config.avg_accuracy, config.avg_loss)
     config_list.append(config)
 
+  
+  SPEEDUP_BAND_SIZE = 1.0
+  ENERGY_BAND_SIZE = 10
 
-  if len(config_list) < 30:
-    SPEEDUP_BAND_SIZE = 1.2
-    
+  # No Pareto Selection if list is < 50 configurations
+  if len(config_list) < 50:
+    SPEEDUP_BAND_SIZE = 100 # Include all in Pareto Frontier
     
+
+  print ("*SPEEDUP_BAND_SIZE = ", SPEEDUP_BAND_SIZE)
+  
   ASC, AEC = compute_pareto_points_with_margin(config_list, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE)
 
   
diff --git a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py
index 87ed35bbc4bcac6288c30454ba1d650956dd9118..04ce0d6158819d5cb014411456e1a985fb17b354 100644
--- a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py
+++ b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py
@@ -22,6 +22,7 @@ import threading
 import psutil
 
 from measure_confidence2 import dump_promise_confidence_files3
+from measure_confidence2 import getConfidence, getMinAccuracy
 from select_top_results import select_top_results
 from time import sleep
 from pareto_curve import findParetoConfigs
@@ -169,25 +170,36 @@ class ClangFlagsTuner(MeasurementInterface):
     createFlagsFile("promise_flags", cfg)
     
     run_cmd = binary_name
-    print "binary_name = ", run_cmd
+    print "\nbinary_name = ", run_cmd
     #run_result_call_program = self.call_program(run_cmd)
-    #print "returned \n\n"
 
+
+    total_runs = 2
     FNULL = open(os.devnull, 'wb')
-    p = subprocess.Popen(run_cmd, stdout = FNULL)
+    #p = subprocess.Popen(run_cmd, stdout = FNULL)
+    p = subprocess.Popen([run_cmd, str(total_runs)], stdout = FNULL)
     p.wait()
 
        
     accuracy = getAccuracy("final_accuracy")
+
+    # Get Confidence for multiple runs
+    conf, avg_acc = getConfidence("run_accuracies.txt", accuracy_threshold)  
+    
     # getConfigCost returns the cost associated with the selected configuration
     total_comps = getConfigCost(cfg)
    
     
     Result = opentuner.resultsdb.models.Result()
     Result.time = total_comps
-    Result.accuracy = accuracy
-
-    if accuracy > accuracy_threshold:
+    #Result.accuracy = accuracy
+    min_accuracy = getMinAccuracy("run_accuracies.txt")
+    print ("min_accuracy = ", min_accuracy)
+    Result.accuracy = min_accuracy
+    
+    # Only pass conf if conf == 100
+    if min_accuracy > accuracy_threshold and conf == 100:
+      print ("conf = ", conf, " avg_acc = ", avg_acc)
       #if accuracy not in evaluated_configs:
       config_tuple = (total_comps, accuracy, cfg)
       self.configs_list.append(config_tuple)
@@ -199,8 +211,6 @@ class ClangFlagsTuner(MeasurementInterface):
       f_acc.close()
                    
       
-    print "done with one run"
-
     test_id += 1
     
     return Result
diff --git a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc
index 1b4dd03b25352290198178fba7bd35590d5fe0cc..e7784cb18e7e7c766f0ae27e6588d4851a2f2812 100644
--- a/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc
+++ b/llvm/projects/hpvm-tensor-rt/tensor_runtime/include/tensor_signatures.cc
@@ -7,6 +7,9 @@ void dummyFunction(){
   void* initRT = (void*) &llvm_hpvm_initTensorRt;
   void* cleanRT = (void*) &llvm_hpvm_cleanupTensorRt;
 
+  void* initApproxRT = (void*) &llvm_hpvm_initApproxhpvmRt;
+  void* cleanApproxRT = (void*) &llvm_hpvm_cleanupApproxhpvmRt;
+
   void* initRTController = (void*) &llvm_hpvm_initializeRuntimeController;
   void* cleanRTController = (void*) &llvm_hpvm_clearRuntimeController;
   
diff --git a/llvm/projects/soc_simulator/src/driver_new_config.py b/llvm/projects/soc_simulator/src/driver_new_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..115237dac51c96b47d02c84a603d98bdcf0b84a4
--- /dev/null
+++ b/llvm/projects/soc_simulator/src/driver_new_config.py
@@ -0,0 +1,328 @@
+from collections import defaultdict
+import os
+import subprocess
+import sys
+
+class Driver:
+    fp16_swing = 8
+
+    class ApproxTypes:
+        FP16 = 0
+        FP32 = 1
+        PROMISE = 2
+        PERF = 3
+
+    results_time_key = "Time"
+    results_energy_key = "Energy"
+
+
+    def driver(self):
+        self.__parse_tensor_layer_file()
+        self.__parse_tensor_table()
+        self.__run_simulations()
+        self.__display_results()
+
+
+    def __init__(self, layer_filename, table_filename, config_filename, results_filename):
+        self.__layer_filename = layer_filename
+        self.__table_filename = table_filename
+        self.__config_filename = config_filename
+        self.__results_filename = results_filename
+
+        # NOTE: Use an OrderedDict if we want to search by operation name 
+        # Using a list bc we care about the order the data is read in
+        # since it corresponds to the data in the configuration file
+        self.__tensor_layers = []
+
+        # [layer_name][operation_name][cols] 
+        # Operation names need to be stored in order of insertion 
+        self.__tensor_table = defaultdict(lambda: list(defaultdict(str)))
+
+        # [Time/Energy][number corresponding to order the layer config was read in] = time/energy
+        self.__aggregate_results = defaultdict(lambda: defaultdict(float))
+        self.__config_count = 0
+
+
+    @staticmethod
+    def is_conv(operation_name):
+        return operation_name.startswith("Conv")
+
+
+    @staticmethod
+    def is_nml(operation_name):
+        return operation_name.startswith("NML")
+
+
+    @staticmethod
+    def is_fc(operation_name):
+        return operation_name.startswith("FC")
+
+
+    def __parse_tensor_layer_file(self): 
+        if not os.path.isfile(self.__layer_filename):
+            print("ERROR: %s was not found." % self.__layer_filename)
+            exit(1)
+
+        layer_file = open(self.__layer_filename, "r")
+        for line in layer_file:
+            layer_data = line.strip().split(',')
+            layer_name = layer_data[0]
+
+            tensor_layer = defaultdict(str)
+            tensor_layer["Name"] = layer_name
+
+            if Driver.is_conv(layer_name):
+                tensor_layer["N"] = float(layer_data[1])
+                tensor_layer["Cin"] = float(layer_data[2])
+                tensor_layer["H"] = float(layer_data[3])
+                tensor_layer["W"] = float(layer_data[4])
+                tensor_layer["Cout"] = float(layer_data[5])
+                tensor_layer["Kh"] = float(layer_data[7])
+                tensor_layer["Kw"] = float(layer_data[8])
+                tensor_layer["Sh"] = float(layer_data[9])
+                tensor_layer["Sw"] = float(layer_data[10])
+
+            elif Driver.is_fc(layer_name):
+                tensor_layer["RA"] = float(layer_data[1])
+                tensor_layer["CA"] = float(layer_data[2])
+                tensor_layer["RB"] = float(layer_data[3])
+                tensor_layer["CB"] = float(layer_data[4])
+
+            elif not Driver.is_nml(layer_name): # TODO should we store data for NMLs?
+                print("ERROR: Invalid layer name %s" % layer_name)
+                exit(1)
+
+            self.__tensor_layers.append(tensor_layer)
+        layer_file.close()
+
+
+    def __parse_tensor_table(self): 
+        if not os.path.isfile(self.__table_filename):
+            print("ERROR: %s was not found." % self.__table_filename)
+            exit(1)
+        table_file = open(self.__table_filename, "r")
+        line = table_file.readline().strip()
+
+        while line:
+            # Line here MUST be a header or there's a bug 
+            # Get the description of the layer 
+            assert(line.startswith("**"))
+
+            header_contents = line.split(' ')[1:] 
+            layer_name = header_contents[0]
+            num_ops = int(header_contents[1])
+            col_names = header_contents[2:]
+
+            layer_operations = []
+
+            # Go through all operations in the layer
+            for op_count in range(num_ops):
+                operation_data = defaultdict(str)
+
+                line = table_file.readline().strip()
+                op_data = line.split(' ')
+                op_name = op_data[0]
+                operation_data["Name"] = op_name
+
+                # Number of data items (#s) needs to match up with the # of cols 
+                assert(len(op_data) - 1 == len(col_names)) 
+
+                # Go through all data items (each col element) per operation 
+                for i in range(len(col_names)):
+                    operation_data[col_names[i]] = float(op_data[i + 1])
+
+                layer_operations.append(operation_data)
+
+            self.__tensor_table[layer_name] = layer_operations
+            line = table_file.readline().strip()
+        table_file.close()
+
+
+    @staticmethod
+    def is_promise(config_layer):
+        return float(config_layer.split(' ')[0]) < Driver.fp16_swing
+
+
+    def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data):
+        if curr_layer == prev_layer or curr_layer == Driver.ApproxTypes.PROMISE \
+                    or prev_layer == Driver.ApproxTypes.PROMISE: # No quantization needed
+            return 0.0, 0.0
+       
+        layer_name = layer_data["Name"]
+
+        # NOTE: Ignoring logic where curr == promise or prev == promise bc 
+        # smartDMA is always true so we'd return near the beginning of the method
+
+        # Get h2f/f2h data using the first tensor operation in the layer
+        # (which is why order matters in the tensor table)
+        print(layer_name, self.__tensor_table[layer_name])
+        tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind]  
+        if curr_layer == Driver.ApproxTypes.FP32:
+            time = tensor_op_row["h2f_time"]
+            energy = tensor_op_row["h2f_energy"]
+        elif curr_layer == Driver.ApproxTypes.FP16:
+            time = tensor_op_row["f2h_time"]
+            energy = tensor_op_row["f2h_energy"]
+
+        print("Quantization: (%f, %f)" % (time, energy))
+        return (time, energy)
+
+
+    def __run_promise_simulation(self, swing, layer_data):
+        layer_name = layer_data["Name"] 
+        patch_factor = 1 
+
+        if Driver.is_conv(layer_name): 
+            rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \
+                    / (layer_data["Sh"] * layer_data["Sw"])
+            cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"]
+            rows_b = cols_a
+            cols_b = layer_data["Cout"]
+            patch_factor = layer_data["Kh"] * layer_data["Kw"]
+        elif Driver.is_fc(layer_name):
+            rows_a = layer_data["RA"] 
+            cols_a = layer_data["CA"]
+            rows_b = cols_a
+            cols_b = layer_data["CB"]
+        else:
+            print("PROMISE can't run whatever this layer is.")
+            exit(1)
+        # Run promise simulator
+        # TODO need to print time and energy in the ptm runner so we can pipe it
+        output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \
+                    str(cols_b), str(patch_factor), str(swing)], \
+                    stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0]
+        total_time_energy = output.strip().split(',')
+
+        assert(len(total_time_energy) == 2)
+        print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1]))
+        return float(total_time_energy[0]), float(total_time_energy[1])
+
+
+    def __run_simulations(self):
+        if not os.path.isfile(self.__config_filename):
+            print("ERROR: %s was not found" % self.__config_filename)
+            exit(1)
+
+        config_file = open(self.__config_filename, "r")
+     
+        line = config_file.readline().strip()
+			
+        while line:
+            assert(line.startswith("+++++"))
+            config_name = config_file.readline().strip().split(' ')[0] # Next line = configuration name 
+            print("CONFIGURATION")
+
+            line = config_file.readline().strip()
+            layer_ind = 0 # NOTE can also use the leftmost number in the currl ine  
+
+            prev_layer = Driver.ApproxTypes.FP32
+            curr_layer = None
+
+            while not line.startswith("-----"):
+                layer_info = line.split(' ')
+                layer_data = self.__tensor_layers[layer_ind]  
+                layer_name = layer_data["Name"]
+
+                if layer_info[1] == "promise":
+                    print("Running layer %s on PROMISE" % layer_name)
+                    curr_layer = Driver.ApproxTypes.PROMISE
+                    
+                    swing = int(layer_info[3])
+                    time, energy = self.__run_promise_simulation(swing, layer_data)
+                    print(time, energy)
+                    self.__aggregate_results[Driver.results_time_key][self.__config_count] += time
+                    self.__aggregate_results[Driver.results_energy_key][self.__config_count] += energy
+				
+                elif layer_info[1] == "gpu":
+                    # Parse each individual tensor operation
+                    # TODO not portable bc there can be multiple numbers after each approx later on 
+                    total_time = 0
+                    total_energy = 0
+
+                    tensor_ind = 0
+                    for i in range(2, len(layer_info), 3):
+                        tensor_op = layer_info[i]
+                        approx_type = layer_info[i + 1]
+                        approx_num = layer_info[i + 2] # only matters if perf
+
+                        if approx_type == "fp16":
+                            curr_layer = Driver.ApproxTypes.FP16
+                        elif approx_type == "fp32":
+                            curr_layer = Driver.ApproxTypes.FP32
+                        elif approx_type == "perf":
+                            curr_layer = DriverApproxTypes.PERF
+                        else: 
+                            assert(False) 
+
+                        quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, tensor_ind, layer_data)
+                        time, energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_ind, approx_num)
+                        total_time += time
+                        total_energy += energy
+	
+                        tensor_ind += 1 
+
+                    self.__aggregate_results[Driver.results_time_key][self.__config_count] += total_time
+                    self.__aggregate_results[Driver.results_energy_key][self.__config_count] += total_energy
+
+                layer_ind += 1
+                line = config_file.readline().strip()
+
+            self.__config_count += 1
+            line = config_file.readline().strip()
+
+        config_file.close()
+
+
+    def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind, approx_num):
+        tensor_info = self.__tensor_table[layer_name][tensor_ind]
+
+        if curr_layer == Driver.ApproxTypes.FP32:
+			time = tensor_info["fp32_time"]
+			energy = tensor_info["fp32_energy"]
+
+        elif curr_layer == Driver.ApproxTypes.FP16:
+			time = tensor_info["fp16_time"]
+			energy = tensor_info["fp16_energy"]
+
+        elif curr_layer == Driver.ApproxTypes.PERF:
+			time = tensor_info["perf%s_energy" % approx_num]
+			energy = tensor_info["perf%s_energy" % approx_num]
+            
+        print("GPU: (%f, %f)" % (time, energy))
+        return time, energy
+
+
+    def __display_results(self):
+        results_file = open(self.__results_filename, "w")
+        attributes_to_print = [Driver.results_time_key, Driver.results_energy_key]
+
+        for attribute in attributes_to_print:
+            results_file.write("%s\n" % attribute)
+            results_file.write("Configuration,Total,Improvement\n") 
+
+            baseline_val = self.__aggregate_results[attribute][0]
+            print(baseline_val)
+            best_config = None
+            best_result = None
+
+            for config_ind in range(self.__config_count):
+                results_file.write("c%d" % config_ind)
+                time_or_energy_val = self.__aggregate_results[attribute][config_ind]
+
+                # Using repr to keep all decimal digits when writing to file
+                results_file.write(",%s" % repr(time_or_energy_val))
+                results_file.write(",%s\n" % repr(baseline_val / (time_or_energy_val + 0.0001)))
+
+                if not best_result or time_or_energy_val < best_result:
+                    best_result = time_or_energy_val
+                    best_config = config_ind
+            results_file.write("\nc%d,%s\n\n" % (best_config, repr(self.__aggregate_results[attribute][best_config])))
+        results_file.close()
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 5:
+        print("Usage: python driver.py <layer info> <tensor info> <configurations> <results file>")
+        exit(1)
+    Driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]).driver()