diff --git a/hpvm/projects/hpvm-tensor-rt/bin/legacy/driver.py b/hpvm/projects/hpvm-tensor-rt/bin/legacy/driver.py
deleted file mode 100644
index 6ff0e643f26e1120b70003f62d88a0a0160423ce..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/bin/legacy/driver.py
+++ /dev/null
@@ -1,505 +0,0 @@
-from collections import defaultdict
-import os
-import subprocess
-import sys
-
-class Driver:
-    fp16_swing = 8
-
-    class PrecisionTypes:
-        FP16 = 0
-        FP32 = 1
-        PROMISE = 2
-
-    class ApproxTypes:
-        PERF = 3 
-        SAMP = 4
-	REDUCE = 5
-	FFT = 6
-
-    results_time_key = "Time"
-    results_energy_key = "Energy"
-
-
-    def __init__(self, layer_filename, table_filename, config_filename, results_filename):
-        self.__layer_filename = layer_filename
-        self.__table_filename = table_filename
-        self.__config_filename = config_filename
-        self.__results_filename = results_filename
-
-        # NOTE: Use an OrderedDict if we want to search by operation name 
-        # Using a list bc we care about the order the data is read in
-        # since it corresponds to the data in the configuration file
-        self.__tensor_layers = []
-
-        # [layer_name][operation_name][cols] 
-        # Operation names need to be stored in order of insertion 
-        self.__tensor_table = defaultdict(lambda: list(defaultdict(str)))
-
-        self.__conf_results = [] # indexed 
-        #self.__conf_results = {} # {conf name: (first line, [[layer value if promise], [tensor vals if gpu]])}
-
-
-    @staticmethod
-    def is_conv(operation_name):
-        return operation_name.startswith("Conv")
-
-
-    @staticmethod
-    def is_nml(operation_name):
-        return operation_name.startswith("NML")
-
-
-    @staticmethod
-    def is_fc(operation_name):
-        return operation_name.startswith("FC")
-
-
-    # FOR DEBUGGING ONLY
-    def __get_str(self, appr):
-        if appr == Driver.PrecisionTypes.FP16:
-            return "FP16"
-        elif appr == Driver.PrecisionTypes.FP32:
-            return "FP32"
-        elif appr == Driver.PrecisionTypes.PROMISE:
-            return "PROMISE"
-        elif appr == Driver.ApproxTypes.PERF:
-            return "PERF"
-        elif appr == Driver.ApproxTypes.SAMP:
-            return "SAMP"
-	elif appr == Driver.ApproxTypes.REDUCE: return "REDUCE"
-	elif appr == Driver.ApproxTypes.FFT: return "FFT"
-
-    def driver(self):
-        self.__parse_tensor_layer_file()
-        self.__parse_tensor_table()
-        self.__run_simulations()
-        self.__write_output()
-
-
-    def __parse_tensor_layer_file(self): 
-    	if self.__layer_filename == "NONE": return
-        if not os.path.isfile(self.__layer_filename):
-            print("ERROR: %s was not found." % self.__layer_filename)
-            exit(1)
-        layer_file = open(self.__layer_filename, "r")
-        for line in layer_file:
-            layer_data = line.strip().split(',')
-            layer_name = layer_data[0]
-
-            tensor_layer = defaultdict(str)
-            tensor_layer["Name"] = layer_name
-
-            if Driver.is_conv(layer_name):
-                tensor_layer["N"] = float(layer_data[1])
-                tensor_layer["Cin"] = float(layer_data[2])
-                tensor_layer["H"] = float(layer_data[3])
-                tensor_layer["W"] = float(layer_data[4])
-                tensor_layer["Cout"] = float(layer_data[5])
-                tensor_layer["Kh"] = float(layer_data[7])
-                tensor_layer["Kw"] = float(layer_data[8])
-                tensor_layer["Sh"] = float(layer_data[9])
-                tensor_layer["Sw"] = float(layer_data[10])
-
-            elif Driver.is_fc(layer_name):
-                tensor_layer["RA"] = float(layer_data[1])
-                tensor_layer["CA"] = float(layer_data[2])
-                tensor_layer["RB"] = float(layer_data[3])
-                tensor_layer["CB"] = float(layer_data[4])
-
-            elif not Driver.is_nml(layer_name): # TODO should we store data for NMLs?
-                print("ERROR: Invalid layer name %s" % layer_name)
-                exit(1)
-
-            self.__tensor_layers.append(tensor_layer)
-        layer_file.close()
-
-
-    def __parse_tensor_table(self): 
-        if not os.path.isfile(self.__table_filename):
-            print("ERROR: %s was not found." % self.__table_filename)
-            exit(1)
-        table_file = open(self.__table_filename, "r")
-        line = table_file.readline().strip()
-
-        while line:
-            # Line here MUST be a header or there's a bug 
-            # Get the description of the layer 
-            assert(line.startswith("**"))
-            header_contents = line.split(' ')[1:] 
-            layer_name = header_contents[0]
-            num_ops = int(header_contents[1])
-            col_names = header_contents[2:]
-
-            layer_operations = []
-
-            # Go through all operations in the layer
-            for op_count in range(num_ops):
-                operation_data = defaultdict(str)
-
-                line = table_file.readline().strip()
-                op_data = line.split(' ')
-                op_name = op_data[0]
-                operation_data["Name"] = op_name
-
-                # Number of data items (#s) needs to match up with the # of cols 
-                assert(len(op_data) - 1 == len(col_names))
-
-                # Go through all data items (each col element) per operation 
-                for i in range(len(col_names)):
-                    operation_data[col_names[i]] = float(op_data[i + 1])
-
-                layer_operations.append(operation_data)
-
-            self.__tensor_table[layer_name] = layer_operations
-            line = table_file.readline().strip()
-        table_file.close()
-
-
-    @staticmethod
-    def is_promise(layer_hardware):
-        return layer_hardware == "promise"
-
-    @staticmethod
-    def is_gpu(layer_hardware):
-        return layer_hardware == "gpu"
-
-    def is_fp32(self, line): 
-    	return line.find("fp32") != -1 or line.find("red_samp 41") != -1 or line.find("red_samp 43") != -1 or line.find("red_samp 45") != -1
-
-    def __run_simulations(self):
-        config_file = open(self.__config_filename, "r")
-        line = config_file.readline().strip()
-
-        while line: 
-            assert(line == "+++++")
-            print("CONFIGURATION")
-           
-            curr_conf_results = []
-
-            prev_layer = Driver.PrecisionTypes.FP32
-            curr_layer = None
-
-            line = config_file.readline().strip()
-            first_line = line
-            conf_name = line.split(' ')[0]
-            print("CONF NAME: %s" % conf_name)
-            assert(conf_name.startswith("conf"))
-            line = config_file.readline().strip()
-
-            while line != "-----":
-                layer_as_lst = line.split(' ')
-                layer_results = []
-                # Skip softmax
-                if line.find("softmax") != -1:
-                    layer_results.append((0, 0, ' '.join(layer_as_lst[2:])))
-                    curr_conf_results.append((layer_as_lst[1], layer_results))
-                    line = config_file.readline().strip()
-                    continue
-                layer_ind = int(layer_as_lst[0]) - 1
-                layer_table_data = self.__tensor_layers[layer_ind]
-                layer_name = layer_table_data["Name"]
-
-                if Driver.is_gpu(layer_as_lst[1]):
-                    print("Running layer %s on the GPU" % layer_name)
-
-                    tensor_count = 0 
-
-                    # 3 elements per tensor operation 
-                    for i in range(2, len(layer_as_lst), 3):
-                        op_type = layer_as_lst[i] # map2 fp32 1
-                        precision_type = layer_as_lst[i + 1] # or approx type 
-                        op_number = layer_as_lst[i + 2]
-                        print(' '.join(layer_as_lst[i : i + 3]))
-
-                        approx_type = None
-                        if self.is_fp32(line): #line.find("fp32") != -1 or line.find("red_samp 41") != -1 or line.find("red_samp 43") != -1 or line.find("red_samp 45") != -1:
-                            curr_layer = Driver.PrecisionTypes.FP32
-			    print("FP32")
-                        else: 
-                            curr_layer = Driver.PrecisionTypes.FP16
-			    print("FP16")
-                        if precision_type == "perf" or precision_type == "samp" or precision_type == "red_samp": # Handle approx type
-                            if precision_type == "perf": 
-                                approx_type = Driver.ApproxTypes.PERF
-                            elif precision_type == "samp": 
-                                approx_type = Driver.ApproxTypes.SAMP
-			    elif precision_type == "red_samp": 
-			    	precision_type = "reduce"
-				approx_type = Driver.ApproxTypes.REDUCE
-			    elif precision_type == "fft":
-			    	approx_type = Driver.ApproxTypes.FFT
-
-                        quant_time, quant_energy = 0, 0#self.__quantize(precision_type, op_number, curr_layer, prev_layer, tensor_count, layer_table_data)
-                        if quant_time != 0:
-                            assert i == 2 #and layer_ind == 0
-                        conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, \
-                                    tensor_count, approx_type, op_number)
-			print(quant_time, conv_time)
-                        layer_results.append((quant_time + conv_time, quant_energy + conv_energy, ' '.join(layer_as_lst[i : i + 3])))
-                        prev_layer = curr_layer
-                        tensor_count += 1
-
-                line = config_file.readline().strip()
-                prev_layer = curr_layer
-                curr_conf_results.append((layer_as_lst[1], layer_results))
-
-            if not self.__conf_results: # we're appending the baseline
-                # need to find the fp16 baseline
-                self.fp16_baseline = []
-
-                prev_layer = Driver.PrecisionTypes.FP32
-                curr_layer = None
-
-                has_quantized = False
-                for layer_ind, (hardware, layer) in enumerate(curr_conf_results):
-                    if layer[0][2].find("softmax") != -1: continue
-                    fp16_layer = []
-                    layer_table_data = self.__tensor_layers[layer_ind]
-                    layer_name = layer_table_data["Name"]
-
-                    for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer): 
-                        curr_layer = Driver.PrecisionTypes.FP16 # always
-
-                        quant_time, quant_energy = self.__quantize("fp16", "1", curr_layer, prev_layer, tensor_ind, layer_table_data)
-                        if quant_time != 0:
-                            assert not has_quantized
-                            has_quantized = True
-                        tensor_info = self.__tensor_table[layer_name][tensor_ind]
-                        fp16_time = tensor_info["fp16_time"] + quant_time
-                        fp16_energy = tensor_info["fp16_energy"] + quant_energy
-                        fp16_layer.append((fp16_time, fp16_energy, tensor_op.replace("fp32", "fp16")))  
-                        prev_layer = curr_layer
-
-                    prev_layer = curr_layer
-                    self.fp16_baseline.append((hardware, fp16_layer))
-            self.__conf_results.append( (first_line, curr_conf_results) )
-            line = config_file.readline().strip()
-        config_file.close()
-
-
-    def __quantize(self, precision_type, op_number, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data):
-        if curr_layer == prev_layer or curr_layer == Driver.PrecisionTypes.PROMISE \
-                    or prev_layer == Driver.PrecisionTypes.PROMISE:
-            return 0.0, 0.0
-        layer_name = layer_data["Name"]
-	print("DATA: ", precision_type, op_number, self.__get_str(curr_layer), self.__get_str(prev_layer), h2f_f2h_operation_ind)
-        # NOTE: Ignoring logic where curr == promise or prev == promise bc 
-        # smartDMA is always true so we'd return near the beginning of the method
-
-        # Get h2f/f2h data using the first tensor operation in the layer
-        # (which is why order matters in the tensor table)
-        tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind]  
-        time_key = None
-        energy_key = None
-
-	print("prec type and op number:", precision_type, op_number)
-	if op_number == "1": lookup_key = "_" #lookup_key = precision_type
-	else: lookup_key = "_" + precision_type + str(op_number) + "_"
-	print(lookup_key)	
-        if curr_layer == Driver.PrecisionTypes.FP32:
-	    lookup_key = lookup_key.replace("41", "42").replace("43", "44").replace("45", "46")
-            time_key = "h2f%stime" % lookup_key
-            energy_key = "h2f%senergy" % lookup_key
-        elif curr_layer == Driver.PrecisionTypes.FP16:
-            time_key = "f2h%stime" % lookup_key
-            energy_key = "f2h%senergy" % lookup_key
-
-	print("QUANTIZATION KEYS", time_key, energy_key)
-        time = tensor_op_row[time_key]
-        energy = tensor_op_row[energy_key]
-	#if not time: time = 0.0
-	#if not energy: energy = 0.0
-	print("QUANTIZATION TIME AND ENERGY", time, energy)
-        return (time, energy)
-
-
-    def __run_promise_simulation(self, swing, layer_data):
-        layer_name = layer_data["Name"] 
-        patch_factor = 1 
-
-        if Driver.is_conv(layer_name): 
-            rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \
-                    / (layer_data["Sh"] * layer_data["Sw"])
-            cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"]
-            rows_b = cols_a
-            cols_b = layer_data["Cout"]
-            patch_factor = layer_data["Kh"] * layer_data["Kw"]
-        elif Driver.is_fc(layer_name):
-            rows_a = layer_data["RA"] 
-            cols_a = layer_data["CA"]
-            rows_b = layer_data["RB"] 
-            cols_b = layer_data["CB"]
-        else:
-            print("PROMISE can't run whatever this layer is.")
-            exit(1)
-        # Run promise simulator
-        # TODO need to print time and energy in the ptm runner so we can pipe it
-        output = subprocess.Popen(["./ptm_new", str(rows_a), str(cols_a), str(rows_b), \
-                    str(cols_b), str(patch_factor), str(swing)], \
-                    stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0]
-        total_time_energy = output.strip().split(',')
-
-        assert(len(total_time_energy) == 2)
-        return float(total_time_energy[0]), float(total_time_energy[1])
-
-
-    def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind, \
-                    approx_type = None, knob_number = None):
-        tensor_info = self.__tensor_table[layer_name][tensor_ind]
-        time_key = None
-        energy_key = None
-
-        if approx_type == Driver.ApproxTypes.PERF or approx_type == Driver.ApproxTypes.SAMP or approx_type == Driver.ApproxTypes.REDUCE or approx_type == Driver.ApproxTypes.FFT: 
-            approx_type_str = None
-            if approx_type == Driver.ApproxTypes.PERF:
-                approx_type_str = "perf"
-            elif approx_type == Driver.ApproxTypes.SAMP: 
-                approx_type_str = "samp"
-	    elif approx_type == Driver.ApproxTypes.REDUCE: 
-	    	approx_type_str = "reduce"
-            elif approx_type == Driver.ApproxTypes.FFT:
-		approx_type_str = "reduce"
-
-            if curr_layer == Driver.PrecisionTypes.FP32:
-                time_key = "fp32_%s%s_time" % (approx_type_str, knob_number)
-                energy_key = "fp32_%s%s_energy" % (approx_type_str, knob_number)
-
-            elif curr_layer == Driver.PrecisionTypes.FP16:
-                time_key = "fp16_%s%s_time" % (approx_type_str, knob_number)
-                energy_key = "fp16_%s%s_energy" % (approx_type_str, knob_number)
-
-        else: # None for now
-            if curr_layer == Driver.PrecisionTypes.FP32:
-                time_key = "fp32_time"
-                energy_key = "fp32_energy"
-
-            elif curr_layer == Driver.PrecisionTypes.FP16:
-                time_key = "fp16_time"
-                energy_key = "fp16_energy"
-        print("GPU SIMULATION KEYS: ", time_key, energy_key)
-        conversion_time = tensor_info[time_key]
-        conversion_energy = tensor_info[energy_key]
-        print("GPU: (%f, %f)\n" % (conversion_time, conversion_energy))
-        return conversion_time, conversion_energy
-
-
-    def __write_output(self):
-        config_file = open(self.__config_filename, "r")
-        results_file = open(self.__results_filename, "w")
-
-        def write_conf_to_file(conf_name, final_conf, time_speedup, energy_speedup):
-            # conf = [layer value if promise], [tensor vals if gpu]]
-            conf_str = ["+++++"]
-
-            # process the first line
-            first_line, layers = final_conf
-            first_line_lst = first_line.split(' ')
-            assert first_line_lst[0] == conf_name
-            
-            new_header = [conf_name]
-            new_header.append(repr(time_speedup))
-            new_header.append(repr(energy_speedup))
-            new_header.append(repr(abs(float(first_line_lst[-2]))))
-            new_header.append(repr(abs(float(first_line_lst[-1]))))
-            conf_str.append(' '.join(new_header))
-           
-            for ind, (hardware, layer) in enumerate(layers):
-                layer_lst = [str(ind + 1)]
-                layer_lst.append(hardware)
-                for op_time, op_energy, tensor_op in layer:
-                    layer_lst.append(tensor_op) 
-                conf_str.append(' '.join(layer_lst))
-            conf_str.append("-----\n")
-            results_file.write('\n'.join(conf_str))
-
-        fp32_baseline_conf = None
-        baseline_total_time = baseline_total_energy = 0 
-
-        def get_baseline_times_energies(conf):
-            curr_time = curr_energy = 0
-            for hardware, layer in conf[1]:
-                for op_time, op_energy, tensor_op in layer:
-                    curr_time += op_time
-                    curr_energy += op_energy
-            return curr_time, curr_energy
-
-        def get_final_times_energies_conf(curr_conf, curr_conf_name):
-            final_time = final_energy = 0
-
-            final_conf = [] # List (conf) of lists (layers) of tuples (operation data)
-
-            for layer_ind, (hardware, layer) in enumerate(curr_conf[1]):
-                final_conf_layer = []
-
-                for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer):
-                    if tensor_op.find("softmax") != -1:
-                        final_conf_layer.append((None, None, tensor_op))
-                        continue
-		     # is promise --> add everything in that layer 
-                    if tensor_op.find("promise") != -1: 
-		    	assert False 
-                    else: # look at the individual tensor operation as before
-			if self.is_fp32(tensor_op): #tensor_op.find("fp32") != -1:
-				baseline_layer = fp32_baseline_conf[1][layer_ind][1]
-				print(baseline_layer)
-				print("FP32")
-			else:
-				baseline_layer = self.fp16_baseline[layer_ind][1]
-				print("FP16")
-                        baseline_time = baseline_layer[tensor_ind][0]
-                        baseline_energy = baseline_layer[tensor_ind][1]
-                        baseline_op = baseline_layer[tensor_ind][2]
-			print("Baseline time: ", baseline_time)
-			print("Baseline energy: ", baseline_energy)
-			print("Baseline op: ", baseline_op)
-                    final_tensor_op = tensor_op
-                    if op_time > baseline_time:
-                        print("**************** BIGGER ******************")
-			print("Curr conf name: ", curr_conf_name)
-                        print("Baseline data: ", baseline_time, baseline_energy, baseline_op, layer_ind)
-                        print("Curr conf data: ", op_time, tensor_op, layer_ind)
-                        final_time += baseline_time
-                        final_energy += baseline_energy
-                        final_tensor_op = baseline_op
-                    else:
-		    	#print("*************** NOT BIGGER ****************")
-			#print("Curr conf name: ", curr_conf_name)
-                        #print("Baseline data: ", baseline_time, baseline_energy, baseline_op, layer_ind)
-			#print("Curr conf data: ", op_time, tensor_op, layer_ind)
-                        final_time += op_time
-                        final_energy += op_energy
-		    print("\n")
-                    final_conf_layer.append((None, None, final_tensor_op)) # Don't care about the times and energies when writing
-                final_conf.append((hardware, final_conf_layer))
-            print("\n")
-            return final_time, final_energy, (curr_conf[0], final_conf) 
-
-        conf_index = 0
-        print("RESULTS")
-        for line in config_file:
-            if line.startswith("conf"):
-                orig_line_lst = line.split(' ')
-                conf_name = orig_line_lst[0]
-
-                if not fp32_baseline_conf:
-                    fp32_baseline_conf = self.__conf_results[conf_index] #conf_name]
-		    print("BASELINE")
-		    print(fp32_baseline_conf)
-                    baseline_total_time, baseline_total_energy = get_baseline_times_energies(fp32_baseline_conf)
-                    results_file.write("%s\n" % repr(baseline_total_time))
-                    write_conf_to_file(conf_name, fp32_baseline_conf, 1, 1)
-                else:
-                    curr_conf = self.__conf_results[conf_index] #conf_name]
-                    #final_time, final_energy, = get_baseline_times_energies(curr_conf)
-                    final_time, final_energy, curr_conf = get_final_times_energies_conf(curr_conf, conf_name)
-                    write_conf_to_file(conf_name, curr_conf, baseline_total_time / final_time, baseline_total_energy / final_energy) 
-                conf_index += 1
-        results_file.close()
-        config_file.close()
-
-if __name__ == "__main__":
-    if len(sys.argv) != 5:
-        print("Usage: python driver.py <layer info or NONE> <tensor info> <configurations> <results file>")
-        exit(1)
-    Driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]).driver()
diff --git a/hpvm/projects/hpvm-tensor-rt/bin/legacy/table_fixer.py b/hpvm/projects/hpvm-tensor-rt/bin/legacy/table_fixer.py
deleted file mode 100644
index 3095d15aba0757aca3b74705ba57b5e189b5cecb..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/bin/legacy/table_fixer.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Fixes table format
-# Remove all instances of cifar10 --> each col should start with fp16 or fp32
-# Combine multiple tables
-
-def fix_columns(table_name, new_filename):
-    table_file = open(table_name, "r")
-
-    new_table_file = []
-
-    for line in table_file:
-        line = line.strip()
-        if line.startswith("**"):
-            col_names = line.split()
-            new_col_names = []
-
-            for col_name in col_names:
-                if col_name.find("fp16") != -1: 
-                    new_col_names.append(col_name[col_name.find("fp16") : ])
-                elif col_name.find("fp32") != -1:
-                    new_col_names.append(col_name[col_name.find("fp32") : ])
-                else:
-                    new_col_names.append(col_name)
-            new_table_file.append(' '.join(new_col_names))
-        else:
-            new_table_file.append(line)
-    table_file.close()
-    table_file_new = open(new_filename, "w")
-    table_file_new.write('\n'.join(new_table_file))
-    table_file_new.close()
-
-def combine_tables(table1, table2, new_filename):
-    table1_file = open(table1, "r")
-    table2_file = open(table2, "r")
-
-    table1_data = table1_file.read().strip().split('\n')
-    table2_data = table2_file.read().strip().split('\n')
-    new_contents = []
-
-    table2_ind = 0
-    for table1_line in table1_data:
-        table2_line = table2_data[table2_ind]
-
-        if table1_line.startswith("**"):
-            assert table2_line.startswith("**")
-            table2_lst = table2_line.strip().split()
-            table2_cols = ' '.join(table2_lst[3 : ])
-            new_contents.append(table1_line + ' ' + table2_cols)
-        else:
-            table2_lst = table2_line.strip().split()
-            table2_cols = ' '.join(table2_lst[1 : ])
-            new_contents.append(table1_line + ' ' + table2_cols)
-        table2_ind += 1
-
-    table1_file.close()
-    table2_file.close()
-
-    new_file = open(new_filename, "w")
-    new_file.write('\n'.join(new_contents))
-    new_file.close()
-
-import sys
-
-if __name__ == "__main__":
-    num_args = len(sys.argv)
-
-    if num_args != 4 and num_args != 5:
-        print("python table_fixer.py <fix> <filename> OR <combine> <table1> <table2> <new name>")
-        exit(1)
-    elif sys.argv[1] == "fix":
-        fix_columns(sys.argv[2], sys.argv[3])
-    elif sys.argv[1] == "combine":
-        combine_tables(sys.argv[2], sys.argv[3], sys.argv[4])
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cifar_keras.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cifar_keras.cc
deleted file mode 100644
index c746e5de6116f701df7370f93969d40486e04e90..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cifar_keras.cc
+++ /dev/null
@@ -1,203 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testCifarNet(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* CIFAR-10 DNN ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 1000;
-
-  uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size);
-  
-  void* input = readTrainedWeights("../model_params/cifar_keras/input.bin",
-			  	   float_type,
-				   test_batch_size, 3, 32, 32);
-
-  void* conv1_filter = readTrainedWeights("../model_params/cifar_keras/conv1.bin",
-					  float_type, 32, 3, 3, 3);  
-  void* conv1_bias = readTrainedWeights("../model_params/cifar_keras/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/cifar_keras/conv2.bin",
-					  float_type, 64, 32, 3, 3);  
-  void* conv2_bias = readTrainedWeights("../model_params/cifar_keras/conv2_bias.bin",
-					float_type, 1, 64, 1, 1);
-
-  void* conv3_filter = readTrainedWeights("../model_params/cifar_keras/conv3.bin",
-					  float_type, 128, 64, 3, 3);  
-  void* conv3_bias = readTrainedWeights("../model_params/cifar_keras/conv3_bias.bin",
-					float_type, 1, 128, 1, 1);  
-  void* conv4_filter = readTrainedWeights("../model_params/cifar_keras/conv4.bin",
-					  float_type, 128, 128, 3, 3);  
-  void* conv4_bias = readTrainedWeights("../model_params/cifar_keras/conv4_bias.bin",
-					float_type, 1, 128, 1, 1);
-
-  
-  void* fc1_weights = readTrainedWeights("../model_params/cifar_keras/fc1.bin",
-					 float_type, 1, 1, 2048, 1024);  
-  void* fc1_bias = readTrainedWeights("../model_params/cifar_keras/fc1_bias.bin",
-				      float_type, 1, 1024, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/cifar_keras/fc2.bin",
-					 float_type, 1, 1, 1024, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/cifar_keras/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performnce profiling 
-    startProfiling();
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-
-    tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
-
-    void* conv1_tanh = tensorTanh(conv1out);
-
-
-    // 2nd Layer
-    void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
-
-    void* conv2_tanh = tensorTanh(conv2out);
-
-    void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
-      
-
-    // 3rd Layer
-    void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv3out, conv3_bias); // NOTE: In place operation
-  
-    void* conv3_tanh = tensorTanh(conv3out);
-
-    void* pool3out = tensorPooling(conv3_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-
-    // 4th Layer
-    void* conv4out = tensorConvolution(pool3out, conv4_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv4out, conv4_bias); // NOTE: In place operation
-  
-    void* conv4_tanh = tensorTanh(conv4out);
-
-    void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-
-    printTensorDims(pool4out);
-    
- 
-    void* gemm1out = tensorGemmGPU(pool4out, fc1_weights);  
-
-    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-
-    void* tanh1out = tensorTanh(gemm1biasout);
-  
-    void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights);  
-  
-    void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
-
-    void* tanh2out = tensorTanh(gemm2_biasout);
-  
-    void* result = tensorSoftmax(tanh2out);
-
-    printTensorDims(result);
-    
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testCifarNet();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/alexnet_cifar10_cpu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/alexnet_cifar10_cpu.cc
deleted file mode 100644
index b64d52678238825fe6e6368d1d15f7958c3759aa..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/alexnet_cifar10_cpu.cc
+++ /dev/null
@@ -1,181 +0,0 @@
-
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "../../tensor_runtime/include/tensor_cpu_runtime.h"
-#include "../include/utils_cpu.h"
-#include "../include/types.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testCifarNet(){
-
-  int total_runs = 100;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* CIFAR-10 DNN ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 100;
-
-  //uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size);
-  uint8_t* labels = readLabels("./model_params/alexnet_cifar10/test_labels.bin", test_batch_size);
-    
-  void* input = readTrainedWeightsCPU("./model_params/alexnet_cifar10/norm_cifar_input.bin",
-			  	   float_type,
-				   test_batch_size, 3, 32, 32);
-
-  void* conv1_filter = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv1.bin",
-					  float_type, 64, 3, 11, 11);  
-  void* conv1_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv1_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* conv2_filter = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv2.bin",
-					  float_type, 192, 64, 5, 5);  
-  void* conv2_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv2_bias.bin",
-					float_type, 1, 192, 1, 1);
-
-  void* conv3_filter = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv3.bin",
-					  float_type, 384, 192, 3, 3);  
-  void* conv3_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv3_bias.bin",
-					float_type, 1, 384, 1, 1);  
-  void* conv4_filter = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv4.bin",
-					  float_type, 256, 384, 3, 3);  
-  void* conv4_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv4_bias.bin",
-					float_type, 1, 256, 1, 1);
-  void* conv5_filter = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv5.bin",
-					  float_type, 256, 256, 3, 3);  
-  void* conv5_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv5_bias.bin",
-					float_type, 1, 256, 1, 1);
-  
-  void* fc1_weights = readTrainedWeightsCPU("./model_params/alexnet_cifar10/fc1.bin",
-					 float_type, 1, 1, 4096, 10);  
-  void* fc1_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/fc1_bias.bin",
-				      float_type, 1, 10, 1, 1);  
- 
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* conv1out = tensorConvolutionCPU(input, conv1_filter, 5, 5, 1, 1,
-				       conv_mode, conv_precision);
-
-    tensorAddCPU(conv1out, conv1_bias); // NOTE: In place operation
-
-    void* conv1_tanh = tensorTanhCPU(conv1out);
-
-    void* pool1out = tensorPoolingCPU(conv1_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-    // 2nd Layer
-    void* conv2out = tensorConvolutionCPU(pool1out, conv2_filter, 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAddCPU(conv2out, conv2_bias); // NOTE: In place operation
-
-    void* conv2_tanh = tensorTanhCPU(conv2out);
-
-    void* pool2out = tensorPoolingCPU(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
-      
-
-    // 3rd Layer
-    void* conv3out = tensorConvolutionCPU(pool2out, conv3_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAddCPU(conv3out, conv3_bias); // NOTE: In place operation
-  
-    void* conv3_tanh = tensorTanhCPU(conv3out);
-
-    // 4th Layer
-    void* conv4out = tensorConvolutionCPU(conv3_tanh, conv4_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAddCPU(conv4out, conv4_bias); // NOTE: In place operation
-  
-    void* conv4_tanh = tensorTanhCPU(conv4out);
-    
-    // 5th Layer
-    void* conv5out = tensorConvolutionCPU(conv4_tanh, conv5_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAddCPU(conv5out, conv5_bias); // NOTE: In place operation
-  
-    void* conv5_tanh = tensorTanhCPU(conv5out);
-
-    void* pool5out = tensorPoolingCPU(conv5_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-    // final FC Layer
-    void* gemm1out = tensorGemmCPU(pool5out, fc1_weights);  
-
-    void* gemm1biasout = tensorAddCPU(gemm1out, fc1_bias);
-
-    void* result = tensorSoftmaxCPU(gemm1biasout);
-  
-    computeAccuracy2(labels, test_batch_size, result);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(1);
-
-  testCifarNet();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/resnet18_cifar10_cpu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/resnet18_cifar10_cpu.cc
deleted file mode 100644
index df540551d71814bf4bf18d349bf08cb03151e1dc..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/resnet18_cifar10_cpu.cc
+++ /dev/null
@@ -1,253 +0,0 @@
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "../../tensor_runtime/include/tensor_cpu_runtime.h"
-#include "../include/types.h"
-#include "../include/utils_cpu.h"
-int main() {
-
-  llvm_hpvm_initTensorRt(0);
-
-  std::string dir_prefix = std::string("./model_params/resnet18_cifar10_3/");
-  std::string input_path = dir_prefix + std::string("input.bin");
-  // void* input = readTrainedWeightsCPU(input_path.c_str(), 0, batch_size,3,32,32);
-  std::string labels_path = dir_prefix + std::string("labels.bin");
-  
-  std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
-  void *conv2d_1_w =
-      readTrainedWeightsCPU(conv2d_1_w_path.c_str(), 0, 16, 3, 3, 3);
-  std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin");
-  void *conv2d_1_b =
-      readTrainedWeightsCPU(conv2d_1_b_path.c_str(), 0, 1, 16, 1, 1);
-  std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin");
-  void *conv2d_2_w =
-      readTrainedWeightsCPU(conv2d_2_w_path.c_str(), 0, 16, 16, 3, 3);
-  std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin");
-  void *conv2d_2_b =
-      readTrainedWeightsCPU(conv2d_2_b_path.c_str(), 0, 1, 16, 1, 1);
-  std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin");
-  void *conv2d_3_w =
-      readTrainedWeightsCPU(conv2d_3_w_path.c_str(), 0, 16, 16, 3, 3);
-  std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin");
-  void *conv2d_3_b =
-      readTrainedWeightsCPU(conv2d_3_b_path.c_str(), 0, 1, 16, 1, 1);
-  std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin");
-  void *conv2d_4_w =
-      readTrainedWeightsCPU(conv2d_4_w_path.c_str(), 0, 16, 16, 3, 3);
-  std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin");
-  void *conv2d_4_b =
-      readTrainedWeightsCPU(conv2d_4_b_path.c_str(), 0, 1, 16, 1, 1);
-  std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin");
-  void *conv2d_5_w =
-      readTrainedWeightsCPU(conv2d_5_w_path.c_str(), 0, 16, 16, 3, 3);
-  std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin");
-  void *conv2d_5_b =
-      readTrainedWeightsCPU(conv2d_5_b_path.c_str(), 0, 1, 16, 1, 1);
-  std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin");
-  void *conv2d_6_w =
-      readTrainedWeightsCPU(conv2d_6_w_path.c_str(), 0, 16, 16, 3, 3);
-  std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin");
-  void *conv2d_6_b =
-      readTrainedWeightsCPU(conv2d_6_b_path.c_str(), 0, 1, 16, 1, 1);
-  std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin");
-  void *conv2d_7_w =
-      readTrainedWeightsCPU(conv2d_7_w_path.c_str(), 0, 16, 16, 3, 3);
-  std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin");
-  void *conv2d_7_b =
-      readTrainedWeightsCPU(conv2d_7_b_path.c_str(), 0, 1, 16, 1, 1);
-  std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin");
-  void *conv2d_8_w =
-      readTrainedWeightsCPU(conv2d_8_w_path.c_str(), 0, 32, 16, 3, 3);
-  std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin");
-  void *conv2d_8_b =
-      readTrainedWeightsCPU(conv2d_8_b_path.c_str(), 0, 1, 32, 1, 1);
-  std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin");
-  void *conv2d_10_w =
-      readTrainedWeightsCPU(conv2d_10_w_path.c_str(), 0, 32, 16, 1, 1);
-  std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin");
-  void *conv2d_10_b =
-      readTrainedWeightsCPU(conv2d_10_b_path.c_str(), 0, 1, 32, 1, 1);
-  std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin");
-  void *conv2d_9_w =
-      readTrainedWeightsCPU(conv2d_9_w_path.c_str(), 0, 32, 32, 3, 3);
-  std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin");
-  void *conv2d_9_b =
-      readTrainedWeightsCPU(conv2d_9_b_path.c_str(), 0, 1, 32, 1, 1);
-  std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin");
-  void *conv2d_11_w =
-      readTrainedWeightsCPU(conv2d_11_w_path.c_str(), 0, 32, 32, 3, 3);
-  std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin");
-  void *conv2d_11_b =
-      readTrainedWeightsCPU(conv2d_11_b_path.c_str(), 0, 1, 32, 1, 1);
-  std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin");
-  void *conv2d_12_w =
-      readTrainedWeightsCPU(conv2d_12_w_path.c_str(), 0, 32, 32, 3, 3);
-  std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin");
-  void *conv2d_12_b =
-      readTrainedWeightsCPU(conv2d_12_b_path.c_str(), 0, 1, 32, 1, 1);
-  std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin");
-  void *conv2d_13_w =
-      readTrainedWeightsCPU(conv2d_13_w_path.c_str(), 0, 32, 32, 3, 3);
-  std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin");
-  void *conv2d_13_b =
-      readTrainedWeightsCPU(conv2d_13_b_path.c_str(), 0, 1, 32, 1, 1);
-  std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin");
-  void *conv2d_14_w =
-      readTrainedWeightsCPU(conv2d_14_w_path.c_str(), 0, 32, 32, 3, 3);
-  std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin");
-  void *conv2d_14_b =
-      readTrainedWeightsCPU(conv2d_14_b_path.c_str(), 0, 1, 32, 1, 1);
-  std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin");
-  void *conv2d_15_w =
-      readTrainedWeightsCPU(conv2d_15_w_path.c_str(), 0, 64, 32, 3, 3);
-  std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin");
-  void *conv2d_15_b =
-      readTrainedWeightsCPU(conv2d_15_b_path.c_str(), 0, 1, 64, 1, 1);
-  std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin");
-  void *conv2d_17_w =
-      readTrainedWeightsCPU(conv2d_17_w_path.c_str(), 0, 64, 32, 1, 1);
-  std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin");
-  void *conv2d_17_b =
-      readTrainedWeightsCPU(conv2d_17_b_path.c_str(), 0, 1, 64, 1, 1);
-  std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin");
-  void *conv2d_16_w =
-      readTrainedWeightsCPU(conv2d_16_w_path.c_str(), 0, 64, 64, 3, 3);
-  std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin");
-  void *conv2d_16_b =
-      readTrainedWeightsCPU(conv2d_16_b_path.c_str(), 0, 1, 64, 1, 1);
-  std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin");
-  void *conv2d_18_w =
-      readTrainedWeightsCPU(conv2d_18_w_path.c_str(), 0, 64, 64, 3, 3);
-  std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin");
-  void *conv2d_18_b =
-      readTrainedWeightsCPU(conv2d_18_b_path.c_str(), 0, 1, 64, 1, 1);
-  std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin");
-  void *conv2d_19_w =
-      readTrainedWeightsCPU(conv2d_19_w_path.c_str(), 0, 64, 64, 3, 3);
-  std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin");
-  void *conv2d_19_b =
-      readTrainedWeightsCPU(conv2d_19_b_path.c_str(), 0, 1, 64, 1, 1);
-  std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin");
-  void *conv2d_20_w =
-      readTrainedWeightsCPU(conv2d_20_w_path.c_str(), 0, 64, 64, 3, 3);
-  std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin");
-  void *conv2d_20_b =
-      readTrainedWeightsCPU(conv2d_20_b_path.c_str(), 0, 1, 64, 1, 1);
-  std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin");
-  void *conv2d_21_w =
-      readTrainedWeightsCPU(conv2d_21_w_path.c_str(), 0, 64, 64, 3, 3);
-  std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin");
-  void *conv2d_21_b =
-      readTrainedWeightsCPU(conv2d_21_b_path.c_str(), 0, 1, 64, 1, 1);
-  std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin");
-  void *dense_1_w =
-      readTrainedWeightsCPU(dense_1_w_path.c_str(), 0, 1, 1, 64, 10);
-  std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin");
-  void *dense_1_b =
-      readTrainedWeightsCPU(dense_1_b_path.c_str(), 0, 1, 10, 1, 1);
-
-  int test_input_size = 10000;
-  int batch_size = 100;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  for (int i = 0; i < batch_count; i++) {
-
-    int start = i * batch_size;
-    int end = (i + 1) * batch_size;
-
-    void* input = readTrainedWeightsCPU(input_path.c_str(), 0,batch_size,3,32,32);
-    uint8_t *labels = readLabels(labels_path.c_str(), batch_size);
-    
-    void* var_2 = tensorConvolutionCPU(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-    void* var_3 = tensorAddCPU(var_2, conv2d_1_b); 
-    void* var_4 = tensorReluCPU(var_3); 
-    void* var_6 = tensorConvolutionCPU(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-    void* var_7 = tensorAddCPU(var_6, conv2d_2_b); 
-    void* var_8 = tensorReluCPU(var_7); 
-    void* var_10 = tensorConvolutionCPU(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-    void* var_11 = tensorAddCPU(var_10, conv2d_3_b); 
-    void* var_12 = tensorAddCPU(var_4, var_11); 
-    void *var_13 = tensorReluCPU(var_12); 
-    void* var_15 = tensorConvolutionCPU(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-    void* var_16 = tensorAddCPU(var_15, conv2d_4_b); 
-    void* var_17 = tensorReluCPU(var_16); 
-    void* var_19 = tensorConvolutionCPU(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-    void* var_20 = tensorAddCPU(var_19, conv2d_5_b); 
-    void* var_21 = tensorAddCPU(var_13, var_20); 
-    void* var_22 = tensorReluCPU(var_21); 
-    void* var_24 = tensorConvolutionCPU(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-    void* var_25 = tensorAddCPU(var_24, conv2d_6_b); 
-    void *var_26 = tensorReluCPU(var_25); 
-    void* var_28 = tensorConvolutionCPU(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-    void* var_29 = tensorAddCPU(var_28, conv2d_7_b); 
-    void *var_30 = tensorAddCPU(var_22, var_29); 
-    void *var_31 = tensorReluCPU(var_30); 
-    void* var_33 = tensorConvolutionCPU(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0); 
-    void *var_34 = tensorAddCPU(var_33, conv2d_8_b);  
-    void *var_35 = tensorReluCPU(var_34); 
-    void *var_37 = tensorConvolutionCPU(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-    void* var_38 = tensorAddCPU(var_37, conv2d_9_b); 
-    void* var_40 = tensorConvolutionCPU(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0); 
-    void *var_41 = tensorAddCPU(var_40, conv2d_10_b); 
-    void* var_42 = tensorAddCPU(var_41, var_38); 
-    void* var_43 = tensorReluCPU(var_42); 
-    void* var_45 = tensorConvolutionCPU(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-    void* var_46 = tensorAddCPU(var_45, conv2d_11_b); 
-    void *var_47 = tensorReluCPU(var_46); 
-    void* var_49 = tensorConvolutionCPU(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-    void* var_50 = tensorAddCPU(var_49, conv2d_12_b); 
-    void* var_51 = tensorAddCPU(var_43, var_50); 
-    void* var_52 = tensorReluCPU(var_51); 
-    void* var_54 = tensorConvolutionCPU(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-    void* var_55 = tensorAddCPU(var_54, conv2d_13_b); 
-    void* var_56 = tensorReluCPU(var_55); 
-    void* var_58 = tensorConvolutionCPU(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0); 
-    void* var_59 = tensorAddCPU(var_58, conv2d_14_b); 
-    void* var_60 = tensorAddCPU(var_52, var_59); 
-    void* var_61 = tensorReluCPU(var_60); 
-    void* var_63 = tensorConvolutionCPU(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0); 
-    void* var_64 = tensorAddCPU(var_63, conv2d_15_b); 
-    void* var_65 = tensorReluCPU(var_64); 
-    void* var_67 = tensorConvolutionCPU(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0); 
-    void* var_68 = tensorAddCPU(var_67, conv2d_16_b); 
-    void* var_70 = tensorConvolutionCPU(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0); 
-    void* var_71 = tensorAddCPU(var_70, conv2d_17_b); 
-    void* var_72 = tensorAddCPU(var_71, var_68); 
-    void* var_73 = tensorReluCPU(var_72); 
-    void* var_75 = tensorConvolutionCPU(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0); 
-    void* var_76 = tensorAddCPU(var_75, conv2d_18_b); 
-    void* var_77 = tensorReluCPU(var_76); 
-    void* var_79 = tensorConvolutionCPU(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0); 
-    void* var_80 = tensorAddCPU(var_79, conv2d_19_b); 
-    void* var_81 = tensorAddCPU(var_73, var_80); 
-    void* var_82 = tensorReluCPU(var_81); 
-    void* var_84 = tensorConvolutionCPU(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0); 
-    void* var_85 = tensorAddCPU(var_84, conv2d_20_b); 
-    void* var_86 = tensorReluCPU(var_85); 
-    void* var_88 = tensorConvolutionCPU(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0); 
-    void* var_89 = tensorAddCPU(var_88, conv2d_21_b); 
-    void* var_90 = tensorAddCPU(var_82, var_89); 
-    void* var_91 = tensorReluCPU(var_90); 
-    void* var_92 = tensorPoolingCPU(var_91,1,8,8,0,0,8,8); 
-    void* var_94 = tensorGemmCPU(var_92, dense_1_w);
-    void* var_95 = tensorAddCPU(var_94, dense_1_b); 
-    void* var_96 = tensorSoftmaxCPU(var_95);
-
-    float accuracy = computeAccuracy2(labels,batch_size, var_96); 
-    final_accuracy += accuracy;
-  }
-
-  final_accuracy = final_accuracy / batch_count;
-  dumpFinalAccuracy(final_accuracy);
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/depthwise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/depthwise.cc
deleted file mode 100644
index 00e259079058f1be5163bd43d9982e07b82f1001..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/depthwise.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/depthwise_test_8/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string depthwise_conv2d_1_b_path =  dir_prefix + std::string("depthwise_conv2d_1_b.bin"); 
-  void* depthwise_conv2d_1_b =  readTrainedWeights(depthwise_conv2d_1_b_path.c_str(), 0,1,32,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,6272,1024); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-
-
-
-  startMemTracking(); 
-
-  int test_input_size = 10000; 
-  int batch_size = 10000; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size; 
-    int end = (i + 1) * batch_size; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); 
-
-    void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 1); 
-    void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-    void* var_2 = tensorRelu(var_1); 
-    void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); 
-    void* var_4 = tensorConvolution(var_3, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-    void* var_5 = tensorAdd(var_4, depthwise_conv2d_1_b); 
-    void* var_6 = tensorRelu(var_5); 
-    void* var_8 = tensorGemmGPU(var_6, dense_1_w); 
-    void* var_9 = tensorAdd(var_8, dense_1_b); 
-    void* var_10 = tensorRelu(var_9); 
-    void* var_11 = tensorGemmGPU(var_10, dense_2_w); 
-    void* var_12 = tensorAdd(var_11, dense_2_b); 
-    void* var_13 = tensorRelu(var_12); 
-    void* var_14 = tensorSoftmax(var_13); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, var_14); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/blend_pareto.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/blend_pareto.cpp
deleted file mode 100644
index 1596a157f4175b0462e0b762c643542cc05cf337..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/blend_pareto.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include <algorithm>
-#include <cassert>
-#include <fstream>
-#include <string>
-
-const size_t n_channels = 3;
-
-Tensor *gaussianFilter_(float div) {
-  std::vector<float> gauss_data = {1,  4, 6,  4,  1,  4, 16, 24, 16,
-                                   4,  6, 24, 36, 24, 6, 4,  16, 24,
-                                   16, 4, 1,  4,  6,  4, 1};
-  for (float &f : gauss_data)
-    f /= div;
-  return (Tensor *)createFilterFromData(
-      CUDNN_DATA_FLOAT, gauss_data.data(), 5, 5, 1);
-}
-
-Tensor *gaussianFilter() { return gaussianFilter_(16.0); }
-
-void *normalize(size_t &id, void *image) {
-  auto *max_1D = wrapper_tensorReduce(
-      std::to_string(id++).c_str(), image, 2, (int)MathOp::Max);
-  auto *max = wrapper_tensorReduce(
-      std::to_string(id++).c_str(), max_1D, 3, (int)MathOp::Max);
-  auto *img_norm = wrapper_tensorMap2(
-      std::to_string(id++).c_str(), (int)MathOp::Div, image, max);
-  freeTensor(max_1D);
-  freeTensor(max);
-  return img_norm;
-}
-
-extern std::vector<size_t> sizes(Tensor *t);
-
-void forward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[1] == 3);
-  sz[0] = sz[0] * sz[1];
-  sz[1] = 1;
-  reshape(tensor, sz);
-}
-
-void backward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[0] % 3 == 0);
-  sz[0] = sz[0] / 3;
-  sz[1] = 3;
-  reshape(tensor, sz);
-}
-
-void *sharpen(size_t &id, void *image) {
-  void *gaussian = gaussianFilter();
-  forward_reshape(image);
-  void *blurred = wrapper_ConvLayer(
-      std::to_string(id++).c_str(), image, gaussian, nullptr, 2, 2, 1, 1, 0, 0, -1, 0.0, 0.0);
-  backward_reshape(image);
-  backward_reshape(blurred);
-  void *blurred_norm = normalize(id, blurred);
-  void *image_norm = normalize(id, image);
-  void *ret = wrapper_tensorMap2(
-      std::to_string(id++).c_str(), (int)MathOp::AddWeighted, blurred_norm,
-      image_norm);
-  freeTensor(gaussian);
-  freeTensor(blurred);
-  freeTensor(blurred_norm);
-  freeTensor(image_norm);
-  return ret;
-}
-
-void *main_procedure(void *fg, void *bg) {
-  size_t id = 1;
-  void *g_bg = sharpen(id, bg);
-  void *g_fg = sharpen(id, fg);
-  void *ret = wrapper_tensorMap2(
-      std::to_string(id++).c_str(), (int)MathOp::Blend2, g_bg, g_fg);
-  freeTensor(g_bg);
-  freeTensor(g_fg);
-  return ret;
-}
-
-const size_t batch_size = 250, total_max = 5000;
-const float psnr_threshold = 25.0;
-
-int main() {
-  const char *input1_path = "../model_params/image_processing_5k";
-  const char *input2_path = "../model_params/image_processing_5k_shuffled";
-  const char *ref_output_path = "../model_params/blending_ref_output";
-
-  llvm_hpvm_initTensorRt(0);
-  llvm_hpvm_initializeRuntimeController("tuner_confs.txt", "");
-  startMemTracking();
-  size_t bstart = 0;
-  while (true) {
-    auto *background = readDataSet(input1_path, bstart, batch_size, n_channels),
-         *foreground = readDataSet(input2_path, bstart, batch_size, n_channels);
-    if (!background || !foreground)
-      break;
-
-    auto *result = main_procedure(foreground, background);
-    llvm_hpvm_invokeRtControl(result, nullptr, bstart, bstart + batch_size);
-    bstart += batch_size;
-    if (bstart >= total_max)
-      break;
-    freeBatchMemory();
-  }
-  llvm_hpvm_clearRuntimeController();
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/canny_pareto.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/canny_pareto.cpp
deleted file mode 100644
index 7eb80568596fdc6d1c17e55a177e022f3c3a284a..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/canny_pareto.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include <vector>
-
-Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
-  int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-  auto *data = new float[w * h];
-  float sum = 0.0f;
-  for (int64_t i = -m; i <= m; i++)
-    for (int64_t j = -n; j <= n; j++) {
-      size_t idx = (i + m) * h + (j + n);
-      float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-      data[idx] = exp(exponent);
-      sum += data[idx];
-    }
-  if (sum != 0.0f)
-    for (size_t i = 0; i < w * h; i++)
-      data[i] /= sum;
-  return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
-}
-
-std::pair<Tensor *, Tensor *> getSobelKernels() {
-  std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1});
-  std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1});
-  auto *t1 =
-      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1);
-  auto *t2 =
-      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1);
-  return std::make_pair(t1, t2);
-}
-
-void *main_procedure(void *dataset) {
-  Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1);
-  Tensor *kernel_x, *kernel_y;
-  std::tie(kernel_x, kernel_y) = getSobelKernels();
-
-  // 0. Grayscale
-  auto *summed_image = wrapper_tensorReduce("1", dataset, 1, (int)MathOp::Add);
-  auto *grayscale_image =
-      wrapper_tensorMap1("2", (int)MathOp::Avg3, summed_image);
-  // 1. Denoise
-  auto *image2 = wrapper_ConvLayer(
-      "3", grayscale_image, gaussian, nullptr, 2, 2, 1, 1, 0, 0, -1, 0.0, 0.0);
-  // 2. Get edge gradient / direction
-  auto *grad_x = wrapper_ConvLayer(
-      "4", image2, kernel_x, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0);
-  auto *grad_y = wrapper_ConvLayer(
-      "5", image2, kernel_y, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0);
-  auto *grad_mag = wrapper_tensorMap2("6", (int)MathOp::Hypot, grad_x, grad_y);
-  // 2.5. Normalize grad magnitude
-  auto *grad_max_1D = wrapper_tensorReduce("7", grad_mag, 2, (int)MathOp::Max);
-  auto *grad_max = wrapper_tensorReduce("8", grad_max_1D, 3, (int)MathOp::Max);
-  auto *grad_mag_norm =
-      wrapper_tensorMap2("9", (int)MathOp::Div, grad_mag, grad_max);
-  return grad_mag_norm;
-}
-
-const size_t n_channels = 3;
-const size_t batch_size = 250, total_max = 5000;
-const float psnr_threshold = 25.0;
-
-int main(int argc, char *argv[]) {
-  const char *input_path = "../model_params/image_processing_5k";
-  const char *ref_output_path = "../model_params/canny_ref_output";
-  llvm_hpvm_initTensorRt(0);
-  llvm_hpvm_initializeRuntimeController("tuner_confs.txt", "");
-  size_t bstart = 0;
-  startMemTracking();
-  while (true) {
-    Tensor *batch = readDataSet(input_path, bstart, batch_size);
-    if (batch == nullptr)
-      break;
-
-    auto *result = main_procedure(batch);
-    llvm_hpvm_invokeRtControl(result, nullptr, bstart, bstart + batch_size);
-    bstart += batch_size;
-    if (bstart >= total_max)
-      break;
-    freeBatchMemory();
-  }
-  llvm_hpvm_clearRuntimeController();
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/fft_pareto.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/fft_pareto.cpp
deleted file mode 100644
index f925f98712b0016ba300c588803c75dd3a364ddc..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/fft_pareto.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include <cmath>
-#include <iostream>
-#include <thrust/complex.h>
-#include <vector>
-
-const size_t batch_size = 250, total_max = 5000;
-const size_t half_size = 64, n_colors = N_RGB_CHAN;
-const float psnr_threshold = 25.0;
-
-Tensor *sharpeningFilter(float sigma, size_t w, size_t h) {
-  static const float gaussian_rate = -0.5;
-
-  int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-  auto *data = new float[w * h];
-  float sum = 0.0f;
-  for (int64_t i = -m; i <= m; i++)
-    for (int64_t j = -n; j <= n; j++) {
-      size_t idx = (i + m) * h + (j + n);
-      float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-      float val = exp(exponent);
-      data[idx] = val;
-      sum += data[idx];
-    }
-  for (size_t i = 0; i < w * h; i++)
-    data[i] *= gaussian_rate / sum;
-  size_t center_idx = m * h + n;
-  data[center_idx] += (1 - gaussian_rate);
-  return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, 1);
-}
-
-Tensor *outliningFilter() {
-  float data[9] = {-1, -1, -1, -1, 8, -1, -1, -1, -1};
-  return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, 3, 3, 1);
-}
-
-extern std::vector<size_t> sizes(Tensor *t);
-
-void forward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[1] == 3);
-  sz[0] = sz[0] * sz[1];
-  sz[1] = 1;
-  reshape(tensor, sz);
-}
-
-void backward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[0] % 3 == 0);
-  sz[0] = sz[0] / 3;
-  sz[1] = 3;
-  reshape(tensor, sz);
-}
-
-void *main_procedure(void *dataset, void *outline, void *sharpen) {
-  forward_reshape(dataset);
-  auto *sharpened1 = wrapper_ConvLayer(
-      "1", dataset, sharpen, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0);
-  auto *sharpened2 = wrapper_ConvLayer(
-      "2", sharpened1, sharpen, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0);
-  auto *outlined = wrapper_ConvLayer(
-      "3", sharpened2, outline, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0);
-  backward_reshape(outlined);
-  return outlined;
-}
-
-int main(int argc, char *argv[]) {
-  const char *input1_path = "../model_params/image_processing_5k_128_128";
-  const char *ref_output_path = "../model_params/fft_ref_output";
-  llvm_hpvm_initTensorRt(0);
-  llvm_hpvm_initializeRuntimeController("tuner_confs.txt", "");
-  Tensor *outline = outliningFilter(), *sharpen = sharpeningFilter(1.5, 3, 3);
-  size_t bstart = 0;
-  startMemTracking();
-  while (true) {
-    Tensor *batch = readDataSet(input1_path, bstart, batch_size);
-    if (batch == nullptr) // If end of dataset
-      break;
-    auto *result = main_procedure(batch, outline, sharpen);
-    llvm_hpvm_invokeRtControl(result, nullptr, bstart, bstart + batch_size);
-    bstart += batch_size;
-    if (bstart >= total_max)
-      break;
-    freeBatchMemory();
-  }
-  llvm_hpvm_clearRuntimeController();
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc2_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc2_half.cc
deleted file mode 100644
index 44c03aab875a6de4af6c87776241295cd1fd673b..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc2_half.cc
+++ /dev/null
@@ -1,137 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include "../../include/types.h"
-
-
-bool Opentuner_run = false;
-
-
-void test_2_Layer_clipped_FC(){
-
-  int total_runs = 1;
-
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* 2-Layer FC with clipped activations and weights ********* \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 5000;
-
-
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-  
-  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
-				   float_type, test_batch_size, 1, 28, 28);  
-  void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin",
-					 float_type, 1, 1, 784, 128);  
-  void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin",
-				      float_type, 1, 128, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin",
-					 float_type, 1, 1, 128, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);
-
-
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start execution profiling Tensor ops
-    startProfiling();
-    
-    // Layer-1
-    void* fc1out = tensorHalfGemm(input, fc1_weights);  
-  
-    void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias);
-  
-    void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2);
-  
-    // Layer-2
-    void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights);  
-  
-    void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias);
-  
-    void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2);
-  
-    void* result = tensorSoftmax(fc2_relu);
-
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-
-    dumpAccuracyNorms();
-    freeOutputTensors();
-   
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-  
-}
-
-
-// If an argument is passed - the run goes into OpenTuner mode - waiting on a pipe
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-  
-  // This initializes the runtime - must be called before anything
-  llvm_hpvm_initTensorRt(0);
-
-  test_2_Layer_clipped_FC();
-
-  llvm_hpvm_cleanupTensorRt();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc3_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc3_half.cc
deleted file mode 100644
index 697fea9b8aa61a8c3cf5ec3e8d0d66466df9b1e8..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc3_half.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include "../../include/types.h"
-
-
-
-
-bool Opentuner_run = false;
-
-
-void test_3_Layer_clipped_FC(){
-
-
-  int total_runs = 1000;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* 3-Layer FC with clipped activations and weights ********* \n");
-
-  int test_batch_size = 5000;
-
-
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-
-  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
-				   float_type, test_batch_size, 1, 28, 28);  
-  void* fc1_weights = readTrainedWeights("../model_params/fc3_clipped/fc1.bin",
-					 float_type, 1, 1, 784, 256);  
-  void* fc1_bias = readTrainedWeights("../model_params/fc3_clipped/fc1_bias.bin",
-				      float_type, 1, 256, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/fc3_clipped/fc2.bin",
-					 float_type, 1, 1, 256, 128);  
-  void* fc2_bias = readTrainedWeights("../model_params/fc3_clipped/fc2_bias.bin",
-				      float_type, 1, 128, 1, 1);  
-  void* fc3_weights = readTrainedWeights("../model_params/fc3_clipped/fc3.bin",
-					 float_type, 1, 1, 128, 10);  
-  void* fc3_bias = readTrainedWeights("../model_params/fc3_clipped/fc3_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-    // Start execution profiling Tensor ops
-    startProfiling();
-
-    
-    // Layer-1
-    void* fc1out = tensorHalfGemm(input, fc1_weights);  
-  
-    void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias);
- 
-    void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2);
- 
-    // Layer-2
-    void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights);  
-  
-    void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias);
- 
-    void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2);
- 
-    // Layer-3
-    void* fc3out = tensorHalfGemm(fc2_relu, fc3_weights);  
-  
-    void* fc3_bias_out = tensorHalfAdd(fc3out, fc3_bias);
- 
-    void* fc3_relu = tensorHalfRelu2(fc3_bias_out, 0, 2);
-  
-    void* result = tensorSoftmax(fc3_relu);
-
-    
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-
-    dumpAccuracyNorms();
-    freeOutputTensors();
-   
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-
-  }
-  
-  
-}
-
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  // This initializes the runtime - must be called before anything
-  llvm_hpvm_initTensorRt(0);
-
-  test_3_Layer_clipped_FC();
-
-  llvm_hpvm_cleanupTensorRt();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc4_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc4_half.cc
deleted file mode 100644
index ad999165cfd4148479de58e24fed8291161da491..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc4_half.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include "../../include/types.h"
-
-
-bool Opentuner_run = false;
-
-
-void test_4_Layer_clipped_FC(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-  
-  printf("********* 3-Layer FC with clipped activations and weights ********* \n");
-
-  int test_batch_size = 5000;
-  
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-
-  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
-				   float_type, test_batch_size, 1, 28, 28);  
-  void* fc1_weights = readTrainedWeights("../model_params/fc4_clipped/fc1.bin",
-					 float_type, 1, 1, 784, 512);  
-  void* fc1_bias = readTrainedWeights("../model_params/fc4_clipped/fc1_bias.bin",
-				      float_type, 1, 512, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/fc4_clipped/fc2.bin",
-					 float_type, 1, 1, 512, 256);  
-  void* fc2_bias = readTrainedWeights("../model_params/fc4_clipped/fc2_bias.bin",
-				      float_type, 1, 256, 1, 1);  
-  void* fc3_weights = readTrainedWeights("../model_params/fc4_clipped/fc3.bin",
-					 float_type, 1, 1, 256, 128);  
-  void* fc3_bias = readTrainedWeights("../model_params/fc4_clipped/fc3_bias.bin",
-				      float_type, 1, 128, 1, 1);
-  void* fc4_weights = readTrainedWeights("../model_params/fc4_clipped/fc4.bin",
-					 float_type, 1, 1, 128, 10);  
-  void* fc4_bias = readTrainedWeights("../model_params/fc4_clipped/fc4_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start execution profiling Tensor ops
-    startProfiling();
-  
-    // Layer-1
-    void* fc1out = tensorHalfGemm(input, fc1_weights);  
-
-    void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias);
-    
-    void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2);
-    
-    // Layer-2
-    void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights);  
-  
-    void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias);
-  
-    void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2);
-  
-    // Layer-3
-    void* fc3out = tensorHalfGemm(fc2_relu, fc3_weights);  
-  
-    void* fc3_bias_out = tensorHalfAdd(fc3out, fc3_bias);
-  
-    void* fc3_relu = tensorHalfRelu2(fc3_bias_out, 0, 2);
-  
-    // Layer-4
-    void* fc4out = tensorHalfGemm(fc3_relu, fc4_weights);  
-  
-    void* fc4_bias_out = tensorHalfAdd(fc4out, fc4_bias);
-  
-    void* fc4_relu = tensorHalfRelu2(fc4_bias_out, 0, 2); 
-  
-    void* result = tensorSoftmax(fc4_relu);
-
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-
-  
-}
-
-
-
-int main(int argc, char* argv[]){
- 
-  if(argc > 1)
-    Opentuner_run = true;
-  
-  // This initializes the runtime - must be called before anything
-  llvm_hpvm_initTensorRt(0);
-
-  test_4_Layer_clipped_FC();
-
-  llvm_hpvm_cleanupTensorRt();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/lenet_tanh_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/lenet_tanh_half.cc
deleted file mode 100644
index bb45b14d62e061e704b252aa44e602e0c1d08ba7..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/lenet_tanh_half.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include "../../include/types.h"
-
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* Lenet-2 Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 5000;
-
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-  
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				CUDNN_DATA_FLOAT,
-				test_batch_size, 1, 28, 28);
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* conv1_filter = readTrainedWeights("../model_params/lenet_tanh2/conv1.bin",
-					  float_type, 32, 1, 5, 5);    
-  void* conv1_bias = readTrainedWeights("../model_params/lenet_tanh2/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/lenet_tanh2/conv2.bin",
-					  float_type, 64, 32, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/lenet_tanh2/conv2_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* fc1_weights = readTrainedWeights("../model_params/lenet_tanh2/fc1.bin",
-					 float_type, 1, 1, 7*7*64, 1024);  
-  void* fc1_bias = readTrainedWeights("../model_params/lenet_tanh2/fc1_bias.bin",
-				      float_type, 1, 1024, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/lenet_tanh2/fc2.bin",
-					 float_type, 1, 1, 1024, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/lenet_tanh2/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performnce profiling 
-    startProfiling();
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* conv1out = tensorHalfConvolution(input, conv1_filter, 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-
-    // NOTE: For tensorAdd, the only dimension that MUST match is channels  
-    tensorHalfAdd(conv1out, conv1_bias); // NOTE: In place operation
-
-    void* pool1out = tensorHalfPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
-
-    void* conv1_tanh = tensorHalfTanh(pool1out);
-
-    // NOTE: input channels have to match between tensor op inputs and outputs 
-    void* conv2out = tensorHalfConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-    tensorHalfAdd(conv2out, conv2_bias); // NOTE: In place operation
-
-    void* pool2out = tensorHalfPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
-
-    void* conv2_tanh = tensorHalfTanh(pool2out);
-
-    void* gemm1out = tensorHalfGemm(conv2_tanh, fc1_weights);  
-
-    void* gemm1biasout = tensorHalfAdd(gemm1out, fc1_bias);
-
-    void* tanh1out = tensorHalfTanh(gemm1biasout);
-  
-    void* gemm2out = tensorHalfGemm(tanh1out, fc2_weights);  
-  
-    void* gemm2_biasout = tensorHalfAdd(gemm2out, fc2_bias);
-
-    void* tanh2out = tensorHalfTanh(gemm2_biasout);
-  
-    void* result = tensorSoftmax(tanh2out);
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEMO_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEMO_half.cc
deleted file mode 100644
index 23fd15576ace419976a2b4d7f8191079a59c8c31..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEMO_half.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  startProfiling();
-  
-  printf("********* Pipeline: Gaussian - Emboss - Motion Blur - Outline ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  //long int test_batch_size = 9145;
-  //long int test_batch_size = 4572;
-  long int test_batch_size = 2000;
-  
-  long int H = 240;
-  long int W = 300;
-
-  printf("Reading input\n");
-  void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-				   float_type,
-				   test_batch_size, 1, H, W);
-  
-  printf("Reading golden output\n");
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEMO_calib.bin",
-					   float_type,
-					   test_batch_size, 1, H, W);
-
-  
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-					  float_type, 1, 1, 5, 5);  
-  void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-					  float_type, 1, 1, 1, 1);  
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* gaussian_out = tensorHalfConvolution(input, gaussian_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-
-    void * gaussian_out_clip = tensorHalfRelu2(gaussian_out, 0, 255);
-
-    void* emboss_out = tensorHalfConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1,
-                                       conv_mode, conv_precision);
-    void* emboss_bias_out = tensorHalfAdd(emboss_out, emboss_bias);
-    void* emboss_bias_out_clip = tensorHalfRelu2(emboss_bias_out, 0, 255);
-
-    void* motionblur_out = tensorHalfConvolution(emboss_bias_out_clip, motionblur_filter, 4, 4, 1, 1,
-                                       conv_mode, conv_precision);
-    void * motionblur_out_clip = tensorHalfRelu2(motionblur_out, 0, 255);
-
-    void* outline_out = tensorHalfConvolution(motionblur_out_clip, outline_filter, 1, 1, 1, 1,
-                                       conv_mode, conv_precision);
-    void * result = tensorHalfRelu2(outline_out, 0, 255);
-
-
-    // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation
-    hpvm_request_tensor(result, 0);
-    
-    //dumpOutput(result, "GEMO_calib.bin");
-    
-    
-    computePSNRViolation(result, golden_output, 30);
-
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  stopProfiling(); 
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEOM_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEOM_half.cc
deleted file mode 100644
index 07875553d59a3635c21db8975db9e8986d1bc6c9..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEOM_half.cc
+++ /dev/null
@@ -1,101 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-
-  startProfiling();
-  
-  printf("********* Pipeline: Gaussian - Emboss - Outline - Motion Blur ********** \n");
-
-  long int test_batch_size = 2000;
-  long int H = 240;
-  long int W = 300;
-
-  printf("Reading input\n");
-  void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W); 
-  
-  printf("Reading golden output\n");
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEOM_calib.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-
-  void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-					  float_type, 1, 1, 5, 5);  
-  void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-					  float_type, 1, 1, 1, 1);  
-  
-  
-  for(int i = 0; i < total_runs; i++){
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* gaussian_out = tensorHalfConvolution(input, gaussian_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-
-    void * gaussian_out_clip = tensorHalfRelu2(gaussian_out, 0, 255);
-
-    void* emboss_out = tensorHalfConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1,
-                                       conv_mode, conv_precision);
-    void* emboss_bias_out = tensorHalfAdd(emboss_out, emboss_bias);
-    void* emboss_bias_out_clip = tensorHalfRelu2(emboss_bias_out, 0, 255);
-
-    void* outline_out = tensorHalfConvolution(emboss_bias_out_clip, outline_filter, 1, 1, 1, 1,
-                                       conv_mode, conv_precision);
-    void * outline_out_clip = tensorHalfRelu2(outline_out, 0, 255);
-
-    void* motionblur_out = tensorHalfConvolution(outline_out_clip, motionblur_filter, 4, 4, 1, 1,
-                                       conv_mode, conv_precision);
-    void * result = tensorHalfRelu2(motionblur_out, 0, 255);
-
-
-    // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation
-    hpvm_request_tensor(result, 0);
-    
-    //dumpOutput(result, "GEOM_calib.bin");
-    
-    computePSNRViolation(result, golden_output, 30);
-    
-  }
-
-  stopProfiling();  
-}
-
-
-int main(int argc, char* argv[]){
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEO_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEO_half.cc
deleted file mode 100644
index 369d8ff4f76aaff4663532ee634a79d54b94b2aa..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEO_half.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  
-  printf("********* Pipeline: Gaussian - Emboss - Outline ********** \n");
-
-  startProfiling();
-    
-  long int test_batch_size = 2000; 
-  long int H = 240;
-  long int W = 300;
-
-  printf("Reading input\n");
-  void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-  printf("Reading golden output\n");
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEO_calib.bin",
-					   float_type,
-					   test_batch_size, 1, H, W);
-
-  void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-					  float_type, 1, 1, 5, 5);  
-  void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-					  float_type, 1, 1, 1, 1);  
-  
-  
-  for(int i = 0; i < total_runs; i++){
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* gaussian_out = tensorHalfConvolution(input, gaussian_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-
-    void * gaussian_out_clip = tensorHalfRelu2(gaussian_out, 0, 255);
-
-    void* emboss_out = tensorHalfConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1,
-                                       conv_mode, conv_precision);
-    void* emboss_bias_out = tensorHalfAdd(emboss_out, emboss_bias);
-    void* emboss_bias_out_clip = tensorHalfRelu2(emboss_bias_out, 0, 255);
-
-    void* outline_out = tensorHalfConvolution(emboss_bias_out_clip, outline_filter, 1, 1, 1, 1,
-                                       conv_mode, conv_precision);
-    void * result = tensorHalfRelu2(outline_out, 0, 255);
-
-
-    // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation
-    hpvm_request_tensor(result, 0);
-    
-    //dumpOutput(result, "GEO_calib.bin");
-    
-    computePSNRViolation(result, golden_output, 30);
-    
-  }
-
-  stopProfiling();
-}
-
-
-int main(int argc, char* argv[]){
-  
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSME_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSME_half.cc
deleted file mode 100644
index e2d2ff18090c085405ec94902696e1a6631d94a7..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSME_half.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  
-  printf("********* Pipeline: Gaussian - Sharpen - Motion Blur - Emboss ********** \n");
-
-  startProfiling();
-  
-  //long int test_batch_size = 4572;
-  long int test_batch_size = 2000;
-  long int H = 240;
-  long int W = 300;
-
-  printf("Reading input\n");
-  void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-				   float_type,
-				   test_batch_size, 1, H, W);
-  
-  void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-					  float_type, 1, 1, 5, 5);  
-  void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-					  float_type, 1, 1, 1, 1);  
-  
-  
-  for(int i = 0; i < total_runs; i++){
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* gaussian_out = tensorHalfConvolution(input, gaussian_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-    void * gaussian_out_clip = tensorHalfRelu2(gaussian_out, 0, 255);
-
-    void* sharpen_out = tensorHalfConvolution(gaussian_out_clip, sharpen_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    void * sharpen_out_clip = tensorHalfRelu2(sharpen_out, 0, 255);
-
-    void* motionblur_out = tensorHalfConvolution(sharpen_out_clip, motionblur_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-    void * motionblur_out_clip = tensorHalfRelu2(motionblur_out, 0, 255);
-
-    void* emboss_out = tensorHalfConvolution(motionblur_out_clip, emboss_filter, 2, 2, 1, 1,
-                                       conv_mode, conv_precision);
-    void* emboss_bias_out = tensorHalfAdd(emboss_out, emboss_bias);
-    void* result = tensorHalfRelu2(emboss_bias_out, 0, 255);
-
-
-    //void* result = gaussian_out;
-
-    // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation
-    //hpvm_request_tensor(result, 0);
-    
-    //dumpOutput(result, "GSME_calib.bin");
-    
-    //computePSNRViolation(result, golden_output, 30);
-  }
-
-  stopProfiling();
-}
-
-
-int main(int argc, char* argv[]){
-
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSM_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSM_half.cc
deleted file mode 100644
index 915dc596252576fb39dca073793d618d21634509..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSM_half.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  
-  printf("********* Pipeline: Gaussian - Sharpen - Motion Blur ********** \n");  
-  
-  // Start Profiling
-  startProfiling();
-
-  //long int test_batch_size = 9145;
-  long int test_batch_size = 2000;
-  long int H = 240;
-  long int W = 300;
-
-  printf("Reading input\n");
-  void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-  void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-					  float_type, 1, 1, 5, 5);  
-  void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-					  float_type, 1, 1, 1, 1);  
-  
-  
-  for(int i = 0; i < total_runs; i++){
-
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* gaussian_out = tensorHalfConvolution(input, gaussian_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-    void * gaussian_out_clip = tensorHalfRelu2(gaussian_out, 0, 255);
-
-    void* sharpen_out = tensorHalfConvolution(gaussian_out_clip, sharpen_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    void * sharpen_out_clip = tensorHalfRelu2(sharpen_out, 0, 255);
-
-    void* motionblur_out = tensorHalfConvolution(sharpen_out_clip, motionblur_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-    void * result = tensorHalfRelu2(motionblur_out, 0, 255);
-
-
-    // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation
-    //hpvm_request_tensor(result, 0);
-    
-    //dumpOutput(result, "GSM_calib.bin");
-    
-    //computePSNRViolation(result, golden_output, 30);
-    
-  }
-
-  stopProfiling();  
-}
-
-
-int main(int argc, char* argv[]){
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_clipped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_clipped.cc
deleted file mode 100644
index 575f9b164f865afe268a4692ee6c4fd88b6a45c6..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_clipped.cc
+++ /dev/null
@@ -1,132 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-#include "../include/types.h"
-
-
-bool Opentuner_run = false;
-
-void test_2_Layer_clipped_FC(){
-
-  int total_runs = 10;
-
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* 2-Layer FC with clipped activations and weights ********* \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 5000;
-
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-  
-  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
-				   float_type, test_batch_size, 1, 28, 28);  
-  void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin",
-					 float_type, 1, 1, 784, 128);  
-  void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin",
-				      float_type, 1, 128, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin",
-					 float_type, 1, 1, 128, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);
-
-
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start execution profiling Tensor ops
-    startProfiling();
-    
-    // Layer-1
-    void* fc1out = tensorGemmGPU(input, fc1_weights);  
-  
-    void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
-  
-    void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2);
-  
-    // Layer-2
-    void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
-  
-    void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
-  
-    void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2);
-  
-    void* result = tensorSoftmax(fc2_relu);
-
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-
-    dumpAccuracyNorms();
-    freeOutputTensors();
-   
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }    
-  }
-  
-}
-
-
-// If an argument is passed - the run goes into OpenTuner mode - waiting on a pipe
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-  
-  // This initializes the runtime - must be called before anything
-  llvm_hpvm_initTensorRt(0);
-
-  test_2_Layer_clipped_FC();
-
-  llvm_hpvm_cleanupTensorRt();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_cpu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_cpu.cc
deleted file mode 100644
index 41343afa0484d022758ee690e69f38221c9ece10..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_cpu.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../tensor_runtime/include/tensor_cpu_runtime.h"
-#include "../include/utils_cpu.h"
-#include "../include/types.h"
-
-
-void FC2(){
-
-  printf("********* 2-Layer FC with clipped activations and weights ********* \n");
-
-  int test_batch_size = 100;
-
-  uint8_t* labels = readLabels("./model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-
-  void* input = readTrainedWeightsCPU("./model_params/FC_network2/mnist_float_input.bin",
-				      float_type, test_batch_size, 1, 28, 28);
-
-  void* fc1_weights = readTrainedWeightsCPU("./model_params/fc2_clipped/fc1.bin",
-					 float_type, 1, 1, 784, 128);  
-  void* fc1_bias = readTrainedWeightsCPU("./model_params/fc2_clipped/fc1_bias.bin",
-				      float_type, 1, 128, 1, 1);  
-  void* fc2_weights = readTrainedWeightsCPU("./model_params/fc2_clipped/fc2.bin",
-					 float_type, 1, 1, 128, 10);  
-  void* fc2_bias = readTrainedWeightsCPU("./model_params/fc2_clipped/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);
-
-
-  // Layer-1
-  void* fc1out = tensorGemmCPU(input, fc1_weights);  
-  
-  void* fc1_bias_out = tensorAddCPU(fc1out, fc1_bias);
-  
-  void* fc1_relu = tensorRelu2CPU(fc1_bias_out, 0, 2);
-  
-  // Layer-2
-  void* fc2out = tensorGemmCPU(fc1_relu, fc2_weights);  
-  
-  void* fc2_bias_out = tensorAddCPU(fc2out, fc2_bias);
-  
-  void* fc2_relu = tensorRelu2CPU(fc2_bias_out, 0, 2);
-  
-  void* result = tensorSoftmaxCPU(fc2_relu);
-
-  computeAccuracy2(labels, test_batch_size, result);
-    
-}
-
-
-// If an argument is passed - the run goes into OpenTuner mode - waiting on a pipe
-int main(int argc, char* argv[]){
-
-  llvm_hpvm_initTensorRt(0);
-
-  FC2();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc3_clipped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc3_clipped.cc
deleted file mode 100644
index f566fd98a717698966c258377c32eda9ee30739d..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc3_clipped.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-#include "../include/types.h"
-
-
-
-
-bool Opentuner_run = false;
-
-
-void test_3_Layer_clipped_FC(){
-
-
-  int total_runs = 10000;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* 3-Layer FC with clipped activations and weights ********* \n");
-
-  int test_batch_size = 5000;
-
-
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-
-  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
-				   float_type, test_batch_size, 1, 28, 28);  
-  void* fc1_weights = readTrainedWeights("../model_params/fc3_clipped/fc1.bin",
-					 float_type, 1, 1, 784, 256);  
-  void* fc1_bias = readTrainedWeights("../model_params/fc3_clipped/fc1_bias.bin",
-				      float_type, 1, 256, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/fc3_clipped/fc2.bin",
-					 float_type, 1, 1, 256, 128);  
-  void* fc2_bias = readTrainedWeights("../model_params/fc3_clipped/fc2_bias.bin",
-				      float_type, 1, 128, 1, 1);  
-  void* fc3_weights = readTrainedWeights("../model_params/fc3_clipped/fc3.bin",
-					 float_type, 1, 1, 128, 10);  
-  void* fc3_bias = readTrainedWeights("../model_params/fc3_clipped/fc3_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-    // Start execution profiling Tensor ops
-    startProfiling();
-
-    
-    // Layer-1
-    void* fc1out = tensorGemmGPU(input, fc1_weights);  
-  
-    void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
- 
-    void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2);
- 
-    // Layer-2
-    void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
-  
-    void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
- 
-    void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2);
- 
-    // Layer-3
-    void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights);  
-  
-    void* fc3_bias_out = tensorAdd(fc3out, fc3_bias);
- 
-    void* fc3_relu = tensorRelu2(fc3_bias_out, 0, 2);
-  
-    void* result = tensorSoftmax(fc3_relu);
-
-    
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-
-    dumpAccuracyNorms();
-    freeOutputTensors();
-   
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-
-  }
-  
-  
-}
-
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  // This initializes the runtime - must be called before anything
-  llvm_hpvm_initTensorRt(0);
-
-  test_3_Layer_clipped_FC();
-
-  llvm_hpvm_cleanupTensorRt();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_clipped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_clipped.cc
deleted file mode 100644
index 24a4d888124c43e98c7b78a33c3b5eb29250808d..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_clipped.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-#include "../include/types.h"
-
-
-bool Opentuner_run = false;
-
-
-void test_4_Layer_clipped_FC(){
-
-  int total_runs = 200;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-  
-  printf("********* 3-Layer FC with clipped activations and weights ********* \n");
-
-  int test_batch_size = 5000;
-  
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-
-  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
-				   float_type, test_batch_size, 1, 28, 28);  
-  void* fc1_weights = readTrainedWeights("../model_params/fc4_clipped/fc1.bin",
-					 float_type, 1, 1, 784, 512);  
-  void* fc1_bias = readTrainedWeights("../model_params/fc4_clipped/fc1_bias.bin",
-				      float_type, 1, 512, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/fc4_clipped/fc2.bin",
-					 float_type, 1, 1, 512, 256);  
-  void* fc2_bias = readTrainedWeights("../model_params/fc4_clipped/fc2_bias.bin",
-				      float_type, 1, 256, 1, 1);  
-  void* fc3_weights = readTrainedWeights("../model_params/fc4_clipped/fc3.bin",
-					 float_type, 1, 1, 256, 128);  
-  void* fc3_bias = readTrainedWeights("../model_params/fc4_clipped/fc3_bias.bin",
-				      float_type, 1, 128, 1, 1);
-  void* fc4_weights = readTrainedWeights("../model_params/fc4_clipped/fc4.bin",
-					 float_type, 1, 1, 128, 10);  
-  void* fc4_bias = readTrainedWeights("../model_params/fc4_clipped/fc4_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start execution profiling Tensor ops
-    startProfiling();
-  
-    // Layer-1
-    void* fc1out = tensorGemmGPU(input, fc1_weights);  
-
-    void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
-    
-    void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2);
-    
-    // Layer-2
-    void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
-  
-    void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
-  
-    void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2);
-  
-    // Layer-3
-    void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights);  
-  
-    void* fc3_bias_out = tensorAdd(fc3out, fc3_bias);
-  
-    void* fc3_relu = tensorRelu2(fc3_bias_out, 0, 2);
-  
-    // Layer-4
-    void* fc4out = tensorGemmGPU(fc3_relu, fc4_weights);  
-  
-    void* fc4_bias_out = tensorAdd(fc4out, fc4_bias);
-  
-    void* fc4_relu = tensorRelu2(fc4_bias_out, 0, 2); 
-  
-    void* result = tensorSoftmax(fc4_relu);
-
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-
-  
-}
-
-
-
-int main(int argc, char* argv[]){
- 
-  if(argc > 1)
-    Opentuner_run = true;
-  
-  // This initializes the runtime - must be called before anything
-  llvm_hpvm_initTensorRt(0);
-
-  test_4_Layer_clipped_FC();
-
-  llvm_hpvm_cleanupTensorRt();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_cpu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_cpu.cc
deleted file mode 100644
index b7f6e1eb5256ebd0dbcf718d3e8e30f0d93ecbc5..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_cpu.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../tensor_runtime/include/tensor_cpu_runtime.h"
-#include "../include/utils_cpu.h"
-#include "../include/types.h"
-
-
-bool Opentuner_run = false;
-
-
-void test_4_Layer_clipped_FC(){
-
-  int total_runs = 200;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-  
-  printf("********* 3-Layer FC with clipped activations and weights ********* \n");
-
-  int test_batch_size = 500;
-  
-  uint8_t* labels = readLabels("./model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-
-  void* input = readTrainedWeightsCPU("./model_params/FC_network2/mnist_float_input.bin",
-				   float_type, test_batch_size, 1, 28, 28);  
-  void* fc1_weights = readTrainedWeightsCPU("./model_params/fc4_clipped/fc1.bin",
-					 float_type, 1, 1, 784, 512);  
-  void* fc1_bias = readTrainedWeightsCPU("./model_params/fc4_clipped/fc1_bias.bin",
-				      float_type, 1, 512, 1, 1);  
-  void* fc2_weights = readTrainedWeightsCPU("./model_params/fc4_clipped/fc2.bin",
-					 float_type, 1, 1, 512, 256);  
-  void* fc2_bias = readTrainedWeightsCPU("./model_params/fc4_clipped/fc2_bias.bin",
-				      float_type, 1, 256, 1, 1);  
-  void* fc3_weights = readTrainedWeightsCPU("./model_params/fc4_clipped/fc3.bin",
-					 float_type, 1, 1, 256, 128);  
-  void* fc3_bias = readTrainedWeightsCPU("./model_params/fc4_clipped/fc3_bias.bin",
-				      float_type, 1, 128, 1, 1);
-  void* fc4_weights = readTrainedWeightsCPU("./model_params/fc4_clipped/fc4.bin",
-					 float_type, 1, 1, 128, 10);  
-  void* fc4_bias = readTrainedWeightsCPU("./model_params/fc4_clipped/fc4_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-
-  
-    // Layer-1
-    void* fc1out = tensorGemmCPU(input, fc1_weights);  
-	
-    void* fc1_bias_out = tensorAddCPU(fc1out, fc1_bias);
-    
-    void* fc1_relu = tensorRelu2CPU(fc1_bias_out, 0, 2);
-    
-    // Layer-2
-    void* fc2out = tensorGemmCPU(fc1_relu, fc2_weights);  
-  
-    void* fc2_bias_out = tensorAddCPU(fc2out, fc2_bias);
-  
-    void* fc2_relu = tensorRelu2CPU(fc2_bias_out, 0, 2);
-    
-    // Layer-3
-    void* fc3out = tensorGemmCPU(fc2_relu, fc3_weights);  
-  
-    void* fc3_bias_out = tensorAddCPU(fc3out, fc3_bias);
-  
-    void* fc3_relu = tensorRelu2CPU(fc3_bias_out, 0, 2);
-  
-    // Layer-4
-    void* fc4out = tensorGemmCPU(fc3_relu, fc4_weights);  
-  
-    void* fc4_bias_out = tensorAddCPU(fc4out, fc4_bias);
-  
-    void* fc4_relu = tensorRelu2CPU(fc4_bias_out, 0, 2);
-  
-    void* result = tensorSoftmaxCPU(fc4_relu);
-	
-    computeAccuracy2(labels, test_batch_size, result);
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-
-  
-}
-
-
-
-int main(int argc, char* argv[]){
- 
-  if(argc > 1)
-    Opentuner_run = true;
-  
-  // This initializes the runtime - must be called before anything
-  llvm_hpvm_initTensorRt(0);
-
-  test_4_Layer_clipped_FC();
-
-  llvm_hpvm_cleanupTensorRt();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10.cc
deleted file mode 100644
index 80a55dfd6b138ffa23b1ac9a7148025a491948ed..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10.cc
+++ /dev/null
@@ -1,413 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_quant/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_7_w_path =  dir_prefix + std::string("depthwise_conv2d_7_w.bin"); 
-  void* depthwise_conv2d_7_w =  readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_14_gamma_path =  dir_prefix + std::string("batch_normalization_14_gamma.bin"); 
-  void* batch_normalization_14_gamma =  readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_beta_path =  dir_prefix + std::string("batch_normalization_14_beta.bin"); 
-  void* batch_normalization_14_beta =  readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_mean_path =  dir_prefix + std::string("batch_normalization_14_mean.bin"); 
-  void* batch_normalization_14_mean =  readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_variance_path =  dir_prefix + std::string("batch_normalization_14_variance.bin"); 
-  void* batch_normalization_14_variance =  readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_15_gamma_path =  dir_prefix + std::string("batch_normalization_15_gamma.bin"); 
-  void* batch_normalization_15_gamma =  readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_beta_path =  dir_prefix + std::string("batch_normalization_15_beta.bin"); 
-  void* batch_normalization_15_beta =  readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_mean_path =  dir_prefix + std::string("batch_normalization_15_mean.bin"); 
-  void* batch_normalization_15_mean =  readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_variance_path =  dir_prefix + std::string("batch_normalization_15_variance.bin"); 
-  void* batch_normalization_15_variance =  readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_8_w_path =  dir_prefix + std::string("depthwise_conv2d_8_w.bin"); 
-  void* depthwise_conv2d_8_w =  readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_16_gamma_path =  dir_prefix + std::string("batch_normalization_16_gamma.bin"); 
-  void* batch_normalization_16_gamma =  readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_beta_path =  dir_prefix + std::string("batch_normalization_16_beta.bin"); 
-  void* batch_normalization_16_beta =  readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_mean_path =  dir_prefix + std::string("batch_normalization_16_mean.bin"); 
-  void* batch_normalization_16_mean =  readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_variance_path =  dir_prefix + std::string("batch_normalization_16_variance.bin"); 
-  void* batch_normalization_16_variance =  readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_17_gamma_path =  dir_prefix + std::string("batch_normalization_17_gamma.bin"); 
-  void* batch_normalization_17_gamma =  readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_beta_path =  dir_prefix + std::string("batch_normalization_17_beta.bin"); 
-  void* batch_normalization_17_beta =  readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_mean_path =  dir_prefix + std::string("batch_normalization_17_mean.bin"); 
-  void* batch_normalization_17_mean =  readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_variance_path =  dir_prefix + std::string("batch_normalization_17_variance.bin"); 
-  void* batch_normalization_17_variance =  readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_9_w_path =  dir_prefix + std::string("depthwise_conv2d_9_w.bin"); 
-  void* depthwise_conv2d_9_w =  readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_18_gamma_path =  dir_prefix + std::string("batch_normalization_18_gamma.bin"); 
-  void* batch_normalization_18_gamma =  readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_beta_path =  dir_prefix + std::string("batch_normalization_18_beta.bin"); 
-  void* batch_normalization_18_beta =  readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_mean_path =  dir_prefix + std::string("batch_normalization_18_mean.bin"); 
-  void* batch_normalization_18_mean =  readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_variance_path =  dir_prefix + std::string("batch_normalization_18_variance.bin"); 
-  void* batch_normalization_18_variance =  readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_19_gamma_path =  dir_prefix + std::string("batch_normalization_19_gamma.bin"); 
-  void* batch_normalization_19_gamma =  readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_beta_path =  dir_prefix + std::string("batch_normalization_19_beta.bin"); 
-  void* batch_normalization_19_beta =  readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_mean_path =  dir_prefix + std::string("batch_normalization_19_mean.bin"); 
-  void* batch_normalization_19_mean =  readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_variance_path =  dir_prefix + std::string("batch_normalization_19_variance.bin"); 
-  void* batch_normalization_19_variance =  readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_10_w_path =  dir_prefix + std::string("depthwise_conv2d_10_w.bin"); 
-  void* depthwise_conv2d_10_w =  readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_20_gamma_path =  dir_prefix + std::string("batch_normalization_20_gamma.bin"); 
-  void* batch_normalization_20_gamma =  readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_beta_path =  dir_prefix + std::string("batch_normalization_20_beta.bin"); 
-  void* batch_normalization_20_beta =  readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_mean_path =  dir_prefix + std::string("batch_normalization_20_mean.bin"); 
-  void* batch_normalization_20_mean =  readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_variance_path =  dir_prefix + std::string("batch_normalization_20_variance.bin"); 
-  void* batch_normalization_20_variance =  readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_21_gamma_path =  dir_prefix + std::string("batch_normalization_21_gamma.bin"); 
-  void* batch_normalization_21_gamma =  readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_beta_path =  dir_prefix + std::string("batch_normalization_21_beta.bin"); 
-  void* batch_normalization_21_beta =  readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_mean_path =  dir_prefix + std::string("batch_normalization_21_mean.bin"); 
-  void* batch_normalization_21_mean =  readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_variance_path =  dir_prefix + std::string("batch_normalization_21_variance.bin"); 
-  void* batch_normalization_21_variance =  readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_11_w_path =  dir_prefix + std::string("depthwise_conv2d_11_w.bin"); 
-  void* depthwise_conv2d_11_w =  readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_22_gamma_path =  dir_prefix + std::string("batch_normalization_22_gamma.bin"); 
-  void* batch_normalization_22_gamma =  readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_beta_path =  dir_prefix + std::string("batch_normalization_22_beta.bin"); 
-  void* batch_normalization_22_beta =  readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_mean_path =  dir_prefix + std::string("batch_normalization_22_mean.bin"); 
-  void* batch_normalization_22_mean =  readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_variance_path =  dir_prefix + std::string("batch_normalization_22_variance.bin"); 
-  void* batch_normalization_22_variance =  readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_23_gamma_path =  dir_prefix + std::string("batch_normalization_23_gamma.bin"); 
-  void* batch_normalization_23_gamma =  readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_beta_path =  dir_prefix + std::string("batch_normalization_23_beta.bin"); 
-  void* batch_normalization_23_beta =  readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_mean_path =  dir_prefix + std::string("batch_normalization_23_mean.bin"); 
-  void* batch_normalization_23_mean =  readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_variance_path =  dir_prefix + std::string("batch_normalization_23_variance.bin"); 
-  void* batch_normalization_23_variance =  readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_12_w_path =  dir_prefix + std::string("depthwise_conv2d_12_w.bin"); 
-  void* depthwise_conv2d_12_w =  readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_24_gamma_path =  dir_prefix + std::string("batch_normalization_24_gamma.bin"); 
-  void* batch_normalization_24_gamma =  readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_beta_path =  dir_prefix + std::string("batch_normalization_24_beta.bin"); 
-  void* batch_normalization_24_beta =  readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_mean_path =  dir_prefix + std::string("batch_normalization_24_mean.bin"); 
-  void* batch_normalization_24_mean =  readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_variance_path =  dir_prefix + std::string("batch_normalization_24_variance.bin"); 
-  void* batch_normalization_24_variance =  readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); 
-  std::string batch_normalization_25_gamma_path =  dir_prefix + std::string("batch_normalization_25_gamma.bin"); 
-  void* batch_normalization_25_gamma =  readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_beta_path =  dir_prefix + std::string("batch_normalization_25_beta.bin"); 
-  void* batch_normalization_25_beta =  readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_mean_path =  dir_prefix + std::string("batch_normalization_25_mean.bin"); 
-  void* batch_normalization_25_mean =  readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_variance_path =  dir_prefix + std::string("batch_normalization_25_variance.bin"); 
-  void* batch_normalization_25_variance =  readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string depthwise_conv2d_13_w_path =  dir_prefix + std::string("depthwise_conv2d_13_w.bin"); 
-  void* depthwise_conv2d_13_w =  readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); 
-  std::string batch_normalization_26_gamma_path =  dir_prefix + std::string("batch_normalization_26_gamma.bin"); 
-  void* batch_normalization_26_gamma =  readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_beta_path =  dir_prefix + std::string("batch_normalization_26_beta.bin"); 
-  void* batch_normalization_26_beta =  readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_mean_path =  dir_prefix + std::string("batch_normalization_26_mean.bin"); 
-  void* batch_normalization_26_mean =  readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_variance_path =  dir_prefix + std::string("batch_normalization_26_variance.bin"); 
-  void* batch_normalization_26_variance =  readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); 
-  std::string batch_normalization_27_gamma_path =  dir_prefix + std::string("batch_normalization_27_gamma.bin"); 
-  void* batch_normalization_27_gamma =  readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_beta_path =  dir_prefix + std::string("batch_normalization_27_beta.bin"); 
-  void* batch_normalization_27_beta =  readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_mean_path =  dir_prefix + std::string("batch_normalization_27_mean.bin"); 
-  void* batch_normalization_27_mean =  readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_variance_path =  dir_prefix + std::string("batch_normalization_27_variance.bin"); 
-  void* batch_normalization_27_variance =  readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-
-  startMemTracking(); 
-
-  int test_input_size = 10000; 
-  int batch_size = 1000; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size; 
-    int end = (i + 1) * batch_size; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-    void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-    void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-    void* var_2 = tensorRelu(var_1); 
-    void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-    void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-    void* var_6 = tensorRelu(var_5); 
-    void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-    void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-    void* var_9 = tensorRelu(var_8); 
-    void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-    void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-    void* var_13 = tensorRelu(var_12); 
-    void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-    void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-    void* var_16 = tensorRelu(var_15); 
-    void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-    void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-    void* var_20 = tensorRelu(var_19); 
-    void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-    void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-    void* var_23 = tensorRelu(var_22); 
-    void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-    void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-    void* var_28 = tensorRelu(var_27); 
-    void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-    void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-    void* var_31 = tensorRelu(var_30); 
-    void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-    void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-    void* var_35 = tensorRelu(var_34); 
-    void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-    void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-    void* var_38 = tensorRelu(var_37); 
-    void* var_41 = tensorConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-    void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-    void* var_43 = tensorRelu(var_42); 
-    void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-    void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-    void* var_46 = tensorRelu(var_45); 
-    void* var_48 = tensorConvolution(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); 
-    void* var_49 = tensorBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); 
-    void* var_50 = tensorRelu(var_49); 
-    void* var_51 = tensorConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); 
-    void* var_52 = tensorBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); 
-    void* var_53 = tensorRelu(var_52); 
-    void* var_55 = tensorConvolution(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); 
-    void* var_56 = tensorBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); 
-    void* var_57 = tensorRelu(var_56); 
-    void* var_58 = tensorConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); 
-    void* var_59 = tensorBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); 
-    void* var_60 = tensorRelu(var_59); 
-    void* var_63 = tensorConvolution(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); 
-    void* var_64 = tensorBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); 
-    void* var_65 = tensorRelu(var_64); 
-    void* var_66 = tensorConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); 
-    void* var_67 = tensorBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); 
-    void* var_68 = tensorRelu(var_67); 
-    void* var_70 = tensorConvolution(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); 
-    void* var_71 = tensorBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); 
-    void* var_72 = tensorRelu(var_71); 
-    void* var_73 = tensorConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); 
-    void* var_74 = tensorBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); 
-    void* var_75 = tensorRelu(var_74); 
-    void* var_77 = tensorConvolution(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); 
-    void* var_78 = tensorBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); 
-    void* var_79 = tensorRelu(var_78); 
-    void* var_80 = tensorConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); 
-    void* var_81 = tensorBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); 
-    void* var_82 = tensorRelu(var_81); 
-    void* var_85 = tensorConvolution(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); 
-    void* var_86 = tensorBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); 
-    void* var_87 = tensorRelu(var_86); 
-    void* var_88 = tensorConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); 
-    void* var_89 = tensorBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); 
-    void* var_90 = tensorRelu(var_89); 
-    void* var_92 = tensorConvolution(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); 
-    void* var_93 = tensorBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); 
-    void* var_94 = tensorRelu(var_93); 
-    void* var_95 = tensorConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); 
-    void* var_96 = tensorBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); 
-    void* var_97 = tensorRelu(var_96); 
-    void* var_99 = tensorPooling(var_97,1,2,2,0,0,2,2); 
-    void* var_101 = tensorGemmGPU(var_99, dense_1_w); 
-    void* var_102 = tensorAdd(var_101, dense_1_b); 
-    void* var_103 = tensorSoftmax(var_102); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, var_103); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10_shallow.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10_shallow.cc
deleted file mode 100644
index 89aa451dc1a5ee7b532bd5375e2e71e520c1372e..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10_shallow.cc
+++ /dev/null
@@ -1,242 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(int argc, char* argv[]){ 
-
-  int total_runs = 1;
-  if (argc > 1){
-    total_runs = atoi(argv[1]);
-  }
-
-  
-  llvm_hpvm_initTensorRt(0); 
-
-  //std::string dir_prefix = std::string("../../keras/data/mobilenet_shallow_nathan/");
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_cifar10_shallow/"); 
-
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 1000; 
-  int batch_size = 1000; 
-  int batch_count = test_input_size / batch_size; 
-
-
-  for(int j = 0; j < total_runs; j++){    
-    float final_accuracy = 0.0;    
-    for(int i = 0; i < batch_count; i++){ 
-
-      int start = i * batch_size; 
-      int end = (i + 1) * batch_size; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-      void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-      void* var_2 = tensorRelu(var_1); 
-      void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-      void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-      void* var_6 = tensorRelu(var_5); 
-      void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-      void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-      void* var_9 = tensorRelu(var_8); 
-      void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-      void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-      void* var_13 = tensorRelu(var_12); 
-      void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-      void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-      void* var_16 = tensorRelu(var_15); 
-      void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-      void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-      void* var_20 = tensorRelu(var_19); 
-      void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-      void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-      void* var_23 = tensorRelu(var_22); 
-      void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-      void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-      void* var_28 = tensorRelu(var_27); 
-      void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-      void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-      void* var_31 = tensorRelu(var_30); 
-      void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-      void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-      void* var_35 = tensorRelu(var_34); 
-      void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-      void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-      void* var_38 = tensorRelu(var_37); 
-      void* var_41 = tensorConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-      void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-      void* var_43 = tensorRelu(var_42); 
-      void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-      void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-      void* var_46 = tensorRelu(var_45); 
-      void* var_47 = tensorPooling(var_46,1,2,2,0,0,2,2); 
-      void* var_49 = tensorGemmGPU(var_47, dense_1_w); 
-      void* var_50 = tensorAdd(var_49, dense_1_b); 
-      void* var_51 = tensorSoftmax(var_50); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_51); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy); 
-  }
-
-  dumpExecutionAccuracies();
-    
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_shallow.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_shallow.cc
deleted file mode 100644
index 8905a93edb825b36c9e301ad1e450428740b4cb1..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_shallow.cc
+++ /dev/null
@@ -1,203 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(1); 
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 10000; 
-  int batch_size = 2000; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size; 
-    int end = (i + 1) * batch_size; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-    void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-    void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-    void* var_2 = tensorRelu(var_1); 
-    void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-    void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-    void* var_6 = tensorRelu(var_5); 
-    void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-    void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-    void* var_9 = tensorRelu(var_8); 
-    void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-    void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-    void* var_13 = tensorRelu(var_12); 
-    void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-    void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-    void* var_16 = tensorRelu(var_15); 
-    void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 2, 2, 1, 64); 
-    void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-    void* var_20 = tensorRelu(var_19); 
-    void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-    void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-    void* var_23 = tensorRelu(var_22); 
-    void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-    void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-    void* var_28 = tensorRelu(var_27); 
-    void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-    void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-    void* var_31 = tensorRelu(var_30); 
-    void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-    void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-    void* var_35 = tensorRelu(var_34); 
-    void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-    void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-    void* var_38 = tensorRelu(var_37); 
-    void* var_40 = tensorPooling(var_38,1,2,2,0,0,2,2); 
-    void* var_42 = tensorGemmGPU(var_40, dense_1_w); 
-    void* var_43 = tensorAdd(var_42, dense_1_b); 
-    void* var_44 = tensorSoftmax(var_43); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, var_44); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/resnet_imagenet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/resnet_imagenet.cc
deleted file mode 100644
index 6f180e67f12844d20d2782267323f8631b421431..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/resnet_imagenet.cc
+++ /dev/null
@@ -1,927 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "tensor_runtime.h" 
-#include "utils.h" 
-
-
-
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(1); 
-
-
-  std::string dir_prefix = std::string("/shared/hsharif3/resnet50_imagenet/"); 
-  std::string input_path =  dir_prefix + std::string("test_input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("test_labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,7,7); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,1,1); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,64,1,1); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,64,1,1); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,64,256,1,1); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,256,64,1,1); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,64,256,1,1); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,256,64,1,1); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,128,256,1,1); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,512,128,1,1); 
-  std::string conv2d_14_b_path =  dir_prefix + std::string("conv2d_14_b.bin"); 
-  void* conv2d_14_b =  readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_15_w_path =  dir_prefix + std::string("conv2d_15_w.bin"); 
-  void* conv2d_15_w =  readTrainedWeights(conv2d_15_w_path.c_str(), 0,512,256,1,1); 
-  std::string conv2d_15_b_path =  dir_prefix + std::string("conv2d_15_b.bin"); 
-  void* conv2d_15_b =  readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_gamma_path =  dir_prefix + std::string("batch_normalization_14_gamma.bin"); 
-  void* batch_normalization_14_gamma =  readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_beta_path =  dir_prefix + std::string("batch_normalization_14_beta.bin"); 
-  void* batch_normalization_14_beta =  readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_mean_path =  dir_prefix + std::string("batch_normalization_14_mean.bin"); 
-  void* batch_normalization_14_mean =  readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_variance_path =  dir_prefix + std::string("batch_normalization_14_variance.bin"); 
-  void* batch_normalization_14_variance =  readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_gamma_path =  dir_prefix + std::string("batch_normalization_15_gamma.bin"); 
-  void* batch_normalization_15_gamma =  readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_beta_path =  dir_prefix + std::string("batch_normalization_15_beta.bin"); 
-  void* batch_normalization_15_beta =  readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_mean_path =  dir_prefix + std::string("batch_normalization_15_mean.bin"); 
-  void* batch_normalization_15_mean =  readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_variance_path =  dir_prefix + std::string("batch_normalization_15_variance.bin"); 
-  void* batch_normalization_15_variance =  readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_16_w_path =  dir_prefix + std::string("conv2d_16_w.bin"); 
-  void* conv2d_16_w =  readTrainedWeights(conv2d_16_w_path.c_str(), 0,128,512,1,1); 
-  std::string conv2d_16_b_path =  dir_prefix + std::string("conv2d_16_b.bin"); 
-  void* conv2d_16_b =  readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_16_gamma_path =  dir_prefix + std::string("batch_normalization_16_gamma.bin"); 
-  void* batch_normalization_16_gamma =  readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_16_beta_path =  dir_prefix + std::string("batch_normalization_16_beta.bin"); 
-  void* batch_normalization_16_beta =  readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_16_mean_path =  dir_prefix + std::string("batch_normalization_16_mean.bin"); 
-  void* batch_normalization_16_mean =  readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_16_variance_path =  dir_prefix + std::string("batch_normalization_16_variance.bin"); 
-  void* batch_normalization_16_variance =  readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_17_w_path =  dir_prefix + std::string("conv2d_17_w.bin"); 
-  void* conv2d_17_w =  readTrainedWeights(conv2d_17_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_17_b_path =  dir_prefix + std::string("conv2d_17_b.bin"); 
-  void* conv2d_17_b =  readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_17_gamma_path =  dir_prefix + std::string("batch_normalization_17_gamma.bin"); 
-  void* batch_normalization_17_gamma =  readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_17_beta_path =  dir_prefix + std::string("batch_normalization_17_beta.bin"); 
-  void* batch_normalization_17_beta =  readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_17_mean_path =  dir_prefix + std::string("batch_normalization_17_mean.bin"); 
-  void* batch_normalization_17_mean =  readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_17_variance_path =  dir_prefix + std::string("batch_normalization_17_variance.bin"); 
-  void* batch_normalization_17_variance =  readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_18_w_path =  dir_prefix + std::string("conv2d_18_w.bin"); 
-  void* conv2d_18_w =  readTrainedWeights(conv2d_18_w_path.c_str(), 0,512,128,1,1); 
-  std::string conv2d_18_b_path =  dir_prefix + std::string("conv2d_18_b.bin"); 
-  void* conv2d_18_b =  readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_gamma_path =  dir_prefix + std::string("batch_normalization_18_gamma.bin"); 
-  void* batch_normalization_18_gamma =  readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_beta_path =  dir_prefix + std::string("batch_normalization_18_beta.bin"); 
-  void* batch_normalization_18_beta =  readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_mean_path =  dir_prefix + std::string("batch_normalization_18_mean.bin"); 
-  void* batch_normalization_18_mean =  readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_variance_path =  dir_prefix + std::string("batch_normalization_18_variance.bin"); 
-  void* batch_normalization_18_variance =  readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_19_w_path =  dir_prefix + std::string("conv2d_19_w.bin"); 
-  void* conv2d_19_w =  readTrainedWeights(conv2d_19_w_path.c_str(), 0,128,512,1,1); 
-  std::string conv2d_19_b_path =  dir_prefix + std::string("conv2d_19_b.bin"); 
-  void* conv2d_19_b =  readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_19_gamma_path =  dir_prefix + std::string("batch_normalization_19_gamma.bin"); 
-  void* batch_normalization_19_gamma =  readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_19_beta_path =  dir_prefix + std::string("batch_normalization_19_beta.bin"); 
-  void* batch_normalization_19_beta =  readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_19_mean_path =  dir_prefix + std::string("batch_normalization_19_mean.bin"); 
-  void* batch_normalization_19_mean =  readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_19_variance_path =  dir_prefix + std::string("batch_normalization_19_variance.bin"); 
-  void* batch_normalization_19_variance =  readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_20_w_path =  dir_prefix + std::string("conv2d_20_w.bin"); 
-  void* conv2d_20_w =  readTrainedWeights(conv2d_20_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_20_b_path =  dir_prefix + std::string("conv2d_20_b.bin"); 
-  void* conv2d_20_b =  readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_20_gamma_path =  dir_prefix + std::string("batch_normalization_20_gamma.bin"); 
-  void* batch_normalization_20_gamma =  readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_20_beta_path =  dir_prefix + std::string("batch_normalization_20_beta.bin"); 
-  void* batch_normalization_20_beta =  readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_20_mean_path =  dir_prefix + std::string("batch_normalization_20_mean.bin"); 
-  void* batch_normalization_20_mean =  readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_20_variance_path =  dir_prefix + std::string("batch_normalization_20_variance.bin"); 
-  void* batch_normalization_20_variance =  readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_21_w_path =  dir_prefix + std::string("conv2d_21_w.bin"); 
-  void* conv2d_21_w =  readTrainedWeights(conv2d_21_w_path.c_str(), 0,512,128,1,1); 
-  std::string conv2d_21_b_path =  dir_prefix + std::string("conv2d_21_b.bin"); 
-  void* conv2d_21_b =  readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_gamma_path =  dir_prefix + std::string("batch_normalization_21_gamma.bin"); 
-  void* batch_normalization_21_gamma =  readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_beta_path =  dir_prefix + std::string("batch_normalization_21_beta.bin"); 
-  void* batch_normalization_21_beta =  readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_mean_path =  dir_prefix + std::string("batch_normalization_21_mean.bin"); 
-  void* batch_normalization_21_mean =  readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_variance_path =  dir_prefix + std::string("batch_normalization_21_variance.bin"); 
-  void* batch_normalization_21_variance =  readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_22_w_path =  dir_prefix + std::string("conv2d_22_w.bin"); 
-  void* conv2d_22_w =  readTrainedWeights(conv2d_22_w_path.c_str(), 0,128,512,1,1); 
-  std::string conv2d_22_b_path =  dir_prefix + std::string("conv2d_22_b.bin"); 
-  void* conv2d_22_b =  readTrainedWeights(conv2d_22_b_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_22_gamma_path =  dir_prefix + std::string("batch_normalization_22_gamma.bin"); 
-  void* batch_normalization_22_gamma =  readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_22_beta_path =  dir_prefix + std::string("batch_normalization_22_beta.bin"); 
-  void* batch_normalization_22_beta =  readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_22_mean_path =  dir_prefix + std::string("batch_normalization_22_mean.bin"); 
-  void* batch_normalization_22_mean =  readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_22_variance_path =  dir_prefix + std::string("batch_normalization_22_variance.bin"); 
-  void* batch_normalization_22_variance =  readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_23_w_path =  dir_prefix + std::string("conv2d_23_w.bin"); 
-  void* conv2d_23_w =  readTrainedWeights(conv2d_23_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_23_b_path =  dir_prefix + std::string("conv2d_23_b.bin"); 
-  void* conv2d_23_b =  readTrainedWeights(conv2d_23_b_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_23_gamma_path =  dir_prefix + std::string("batch_normalization_23_gamma.bin"); 
-  void* batch_normalization_23_gamma =  readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_23_beta_path =  dir_prefix + std::string("batch_normalization_23_beta.bin"); 
-  void* batch_normalization_23_beta =  readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_23_mean_path =  dir_prefix + std::string("batch_normalization_23_mean.bin"); 
-  void* batch_normalization_23_mean =  readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_23_variance_path =  dir_prefix + std::string("batch_normalization_23_variance.bin"); 
-  void* batch_normalization_23_variance =  readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_24_w_path =  dir_prefix + std::string("conv2d_24_w.bin"); 
-  void* conv2d_24_w =  readTrainedWeights(conv2d_24_w_path.c_str(), 0,512,128,1,1); 
-  std::string conv2d_24_b_path =  dir_prefix + std::string("conv2d_24_b.bin"); 
-  void* conv2d_24_b =  readTrainedWeights(conv2d_24_b_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_gamma_path =  dir_prefix + std::string("batch_normalization_24_gamma.bin"); 
-  void* batch_normalization_24_gamma =  readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_beta_path =  dir_prefix + std::string("batch_normalization_24_beta.bin"); 
-  void* batch_normalization_24_beta =  readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_mean_path =  dir_prefix + std::string("batch_normalization_24_mean.bin"); 
-  void* batch_normalization_24_mean =  readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_variance_path =  dir_prefix + std::string("batch_normalization_24_variance.bin"); 
-  void* batch_normalization_24_variance =  readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_25_w_path =  dir_prefix + std::string("conv2d_25_w.bin"); 
-  void* conv2d_25_w =  readTrainedWeights(conv2d_25_w_path.c_str(), 0,256,512,1,1); 
-  std::string conv2d_25_b_path =  dir_prefix + std::string("conv2d_25_b.bin"); 
-  void* conv2d_25_b =  readTrainedWeights(conv2d_25_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_25_gamma_path =  dir_prefix + std::string("batch_normalization_25_gamma.bin"); 
-  void* batch_normalization_25_gamma =  readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_25_beta_path =  dir_prefix + std::string("batch_normalization_25_beta.bin"); 
-  void* batch_normalization_25_beta =  readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_25_mean_path =  dir_prefix + std::string("batch_normalization_25_mean.bin"); 
-  void* batch_normalization_25_mean =  readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_25_variance_path =  dir_prefix + std::string("batch_normalization_25_variance.bin"); 
-  void* batch_normalization_25_variance =  readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_26_w_path =  dir_prefix + std::string("conv2d_26_w.bin"); 
-  void* conv2d_26_w =  readTrainedWeights(conv2d_26_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_26_b_path =  dir_prefix + std::string("conv2d_26_b.bin"); 
-  void* conv2d_26_b =  readTrainedWeights(conv2d_26_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_26_gamma_path =  dir_prefix + std::string("batch_normalization_26_gamma.bin"); 
-  void* batch_normalization_26_gamma =  readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_26_beta_path =  dir_prefix + std::string("batch_normalization_26_beta.bin"); 
-  void* batch_normalization_26_beta =  readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_26_mean_path =  dir_prefix + std::string("batch_normalization_26_mean.bin"); 
-  void* batch_normalization_26_mean =  readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_26_variance_path =  dir_prefix + std::string("batch_normalization_26_variance.bin"); 
-  void* batch_normalization_26_variance =  readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_27_w_path =  dir_prefix + std::string("conv2d_27_w.bin"); 
-  void* conv2d_27_w =  readTrainedWeights(conv2d_27_w_path.c_str(), 0,1024,256,1,1); 
-  std::string conv2d_27_b_path =  dir_prefix + std::string("conv2d_27_b.bin"); 
-  void* conv2d_27_b =  readTrainedWeights(conv2d_27_b_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_28_w_path =  dir_prefix + std::string("conv2d_28_w.bin"); 
-  void* conv2d_28_w =  readTrainedWeights(conv2d_28_w_path.c_str(), 0,1024,512,1,1); 
-  std::string conv2d_28_b_path =  dir_prefix + std::string("conv2d_28_b.bin"); 
-  void* conv2d_28_b =  readTrainedWeights(conv2d_28_b_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_gamma_path =  dir_prefix + std::string("batch_normalization_27_gamma.bin"); 
-  void* batch_normalization_27_gamma =  readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_beta_path =  dir_prefix + std::string("batch_normalization_27_beta.bin"); 
-  void* batch_normalization_27_beta =  readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_mean_path =  dir_prefix + std::string("batch_normalization_27_mean.bin"); 
-  void* batch_normalization_27_mean =  readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_variance_path =  dir_prefix + std::string("batch_normalization_27_variance.bin"); 
-  void* batch_normalization_27_variance =  readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_28_gamma_path =  dir_prefix + std::string("batch_normalization_28_gamma.bin"); 
-  void* batch_normalization_28_gamma =  readTrainedWeights(batch_normalization_28_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_28_beta_path =  dir_prefix + std::string("batch_normalization_28_beta.bin"); 
-  void* batch_normalization_28_beta =  readTrainedWeights(batch_normalization_28_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_28_mean_path =  dir_prefix + std::string("batch_normalization_28_mean.bin"); 
-  void* batch_normalization_28_mean =  readTrainedWeights(batch_normalization_28_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_28_variance_path =  dir_prefix + std::string("batch_normalization_28_variance.bin"); 
-  void* batch_normalization_28_variance =  readTrainedWeights(batch_normalization_28_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_29_w_path =  dir_prefix + std::string("conv2d_29_w.bin"); 
-  void* conv2d_29_w =  readTrainedWeights(conv2d_29_w_path.c_str(), 0,256,1024,1,1); 
-  std::string conv2d_29_b_path =  dir_prefix + std::string("conv2d_29_b.bin"); 
-  void* conv2d_29_b =  readTrainedWeights(conv2d_29_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_29_gamma_path =  dir_prefix + std::string("batch_normalization_29_gamma.bin"); 
-  void* batch_normalization_29_gamma =  readTrainedWeights(batch_normalization_29_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_29_beta_path =  dir_prefix + std::string("batch_normalization_29_beta.bin"); 
-  void* batch_normalization_29_beta =  readTrainedWeights(batch_normalization_29_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_29_mean_path =  dir_prefix + std::string("batch_normalization_29_mean.bin"); 
-  void* batch_normalization_29_mean =  readTrainedWeights(batch_normalization_29_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_29_variance_path =  dir_prefix + std::string("batch_normalization_29_variance.bin"); 
-  void* batch_normalization_29_variance =  readTrainedWeights(batch_normalization_29_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_30_w_path =  dir_prefix + std::string("conv2d_30_w.bin"); 
-  void* conv2d_30_w =  readTrainedWeights(conv2d_30_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_30_b_path =  dir_prefix + std::string("conv2d_30_b.bin"); 
-  void* conv2d_30_b =  readTrainedWeights(conv2d_30_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_30_gamma_path =  dir_prefix + std::string("batch_normalization_30_gamma.bin"); 
-  void* batch_normalization_30_gamma =  readTrainedWeights(batch_normalization_30_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_30_beta_path =  dir_prefix + std::string("batch_normalization_30_beta.bin"); 
-  void* batch_normalization_30_beta =  readTrainedWeights(batch_normalization_30_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_30_mean_path =  dir_prefix + std::string("batch_normalization_30_mean.bin"); 
-  void* batch_normalization_30_mean =  readTrainedWeights(batch_normalization_30_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_30_variance_path =  dir_prefix + std::string("batch_normalization_30_variance.bin"); 
-  void* batch_normalization_30_variance =  readTrainedWeights(batch_normalization_30_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_31_w_path =  dir_prefix + std::string("conv2d_31_w.bin"); 
-  void* conv2d_31_w =  readTrainedWeights(conv2d_31_w_path.c_str(), 0,1024,256,1,1); 
-  std::string conv2d_31_b_path =  dir_prefix + std::string("conv2d_31_b.bin"); 
-  void* conv2d_31_b =  readTrainedWeights(conv2d_31_b_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_31_gamma_path =  dir_prefix + std::string("batch_normalization_31_gamma.bin"); 
-  void* batch_normalization_31_gamma =  readTrainedWeights(batch_normalization_31_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_31_beta_path =  dir_prefix + std::string("batch_normalization_31_beta.bin"); 
-  void* batch_normalization_31_beta =  readTrainedWeights(batch_normalization_31_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_31_mean_path =  dir_prefix + std::string("batch_normalization_31_mean.bin"); 
-  void* batch_normalization_31_mean =  readTrainedWeights(batch_normalization_31_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_31_variance_path =  dir_prefix + std::string("batch_normalization_31_variance.bin"); 
-  void* batch_normalization_31_variance =  readTrainedWeights(batch_normalization_31_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_32_w_path =  dir_prefix + std::string("conv2d_32_w.bin"); 
-  void* conv2d_32_w =  readTrainedWeights(conv2d_32_w_path.c_str(), 0,256,1024,1,1); 
-  std::string conv2d_32_b_path =  dir_prefix + std::string("conv2d_32_b.bin"); 
-  void* conv2d_32_b =  readTrainedWeights(conv2d_32_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_32_gamma_path =  dir_prefix + std::string("batch_normalization_32_gamma.bin"); 
-  void* batch_normalization_32_gamma =  readTrainedWeights(batch_normalization_32_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_32_beta_path =  dir_prefix + std::string("batch_normalization_32_beta.bin"); 
-  void* batch_normalization_32_beta =  readTrainedWeights(batch_normalization_32_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_32_mean_path =  dir_prefix + std::string("batch_normalization_32_mean.bin"); 
-  void* batch_normalization_32_mean =  readTrainedWeights(batch_normalization_32_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_32_variance_path =  dir_prefix + std::string("batch_normalization_32_variance.bin"); 
-  void* batch_normalization_32_variance =  readTrainedWeights(batch_normalization_32_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_33_w_path =  dir_prefix + std::string("conv2d_33_w.bin"); 
-  void* conv2d_33_w =  readTrainedWeights(conv2d_33_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_33_b_path =  dir_prefix + std::string("conv2d_33_b.bin"); 
-  void* conv2d_33_b =  readTrainedWeights(conv2d_33_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_33_gamma_path =  dir_prefix + std::string("batch_normalization_33_gamma.bin"); 
-  void* batch_normalization_33_gamma =  readTrainedWeights(batch_normalization_33_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_33_beta_path =  dir_prefix + std::string("batch_normalization_33_beta.bin"); 
-  void* batch_normalization_33_beta =  readTrainedWeights(batch_normalization_33_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_33_mean_path =  dir_prefix + std::string("batch_normalization_33_mean.bin"); 
-  void* batch_normalization_33_mean =  readTrainedWeights(batch_normalization_33_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_33_variance_path =  dir_prefix + std::string("batch_normalization_33_variance.bin"); 
-  void* batch_normalization_33_variance =  readTrainedWeights(batch_normalization_33_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_34_w_path =  dir_prefix + std::string("conv2d_34_w.bin"); 
-  void* conv2d_34_w =  readTrainedWeights(conv2d_34_w_path.c_str(), 0,1024,256,1,1); 
-  std::string conv2d_34_b_path =  dir_prefix + std::string("conv2d_34_b.bin"); 
-  void* conv2d_34_b =  readTrainedWeights(conv2d_34_b_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_34_gamma_path =  dir_prefix + std::string("batch_normalization_34_gamma.bin"); 
-  void* batch_normalization_34_gamma =  readTrainedWeights(batch_normalization_34_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_34_beta_path =  dir_prefix + std::string("batch_normalization_34_beta.bin"); 
-  void* batch_normalization_34_beta =  readTrainedWeights(batch_normalization_34_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_34_mean_path =  dir_prefix + std::string("batch_normalization_34_mean.bin"); 
-  void* batch_normalization_34_mean =  readTrainedWeights(batch_normalization_34_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_34_variance_path =  dir_prefix + std::string("batch_normalization_34_variance.bin"); 
-  void* batch_normalization_34_variance =  readTrainedWeights(batch_normalization_34_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_35_w_path =  dir_prefix + std::string("conv2d_35_w.bin"); 
-  void* conv2d_35_w =  readTrainedWeights(conv2d_35_w_path.c_str(), 0,256,1024,1,1); 
-  std::string conv2d_35_b_path =  dir_prefix + std::string("conv2d_35_b.bin"); 
-  void* conv2d_35_b =  readTrainedWeights(conv2d_35_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_35_gamma_path =  dir_prefix + std::string("batch_normalization_35_gamma.bin"); 
-  void* batch_normalization_35_gamma =  readTrainedWeights(batch_normalization_35_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_35_beta_path =  dir_prefix + std::string("batch_normalization_35_beta.bin"); 
-  void* batch_normalization_35_beta =  readTrainedWeights(batch_normalization_35_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_35_mean_path =  dir_prefix + std::string("batch_normalization_35_mean.bin"); 
-  void* batch_normalization_35_mean =  readTrainedWeights(batch_normalization_35_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_35_variance_path =  dir_prefix + std::string("batch_normalization_35_variance.bin"); 
-  void* batch_normalization_35_variance =  readTrainedWeights(batch_normalization_35_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_36_w_path =  dir_prefix + std::string("conv2d_36_w.bin"); 
-  void* conv2d_36_w =  readTrainedWeights(conv2d_36_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_36_b_path =  dir_prefix + std::string("conv2d_36_b.bin"); 
-  void* conv2d_36_b =  readTrainedWeights(conv2d_36_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_36_gamma_path =  dir_prefix + std::string("batch_normalization_36_gamma.bin"); 
-  void* batch_normalization_36_gamma =  readTrainedWeights(batch_normalization_36_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_36_beta_path =  dir_prefix + std::string("batch_normalization_36_beta.bin"); 
-  void* batch_normalization_36_beta =  readTrainedWeights(batch_normalization_36_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_36_mean_path =  dir_prefix + std::string("batch_normalization_36_mean.bin"); 
-  void* batch_normalization_36_mean =  readTrainedWeights(batch_normalization_36_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_36_variance_path =  dir_prefix + std::string("batch_normalization_36_variance.bin"); 
-  void* batch_normalization_36_variance =  readTrainedWeights(batch_normalization_36_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_37_w_path =  dir_prefix + std::string("conv2d_37_w.bin"); 
-  void* conv2d_37_w =  readTrainedWeights(conv2d_37_w_path.c_str(), 0,1024,256,1,1); 
-  std::string conv2d_37_b_path =  dir_prefix + std::string("conv2d_37_b.bin"); 
-  void* conv2d_37_b =  readTrainedWeights(conv2d_37_b_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_37_gamma_path =  dir_prefix + std::string("batch_normalization_37_gamma.bin"); 
-  void* batch_normalization_37_gamma =  readTrainedWeights(batch_normalization_37_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_37_beta_path =  dir_prefix + std::string("batch_normalization_37_beta.bin"); 
-  void* batch_normalization_37_beta =  readTrainedWeights(batch_normalization_37_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_37_mean_path =  dir_prefix + std::string("batch_normalization_37_mean.bin"); 
-  void* batch_normalization_37_mean =  readTrainedWeights(batch_normalization_37_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_37_variance_path =  dir_prefix + std::string("batch_normalization_37_variance.bin"); 
-  void* batch_normalization_37_variance =  readTrainedWeights(batch_normalization_37_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_38_w_path =  dir_prefix + std::string("conv2d_38_w.bin"); 
-  void* conv2d_38_w =  readTrainedWeights(conv2d_38_w_path.c_str(), 0,256,1024,1,1); 
-  std::string conv2d_38_b_path =  dir_prefix + std::string("conv2d_38_b.bin"); 
-  void* conv2d_38_b =  readTrainedWeights(conv2d_38_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_38_gamma_path =  dir_prefix + std::string("batch_normalization_38_gamma.bin"); 
-  void* batch_normalization_38_gamma =  readTrainedWeights(batch_normalization_38_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_38_beta_path =  dir_prefix + std::string("batch_normalization_38_beta.bin"); 
-  void* batch_normalization_38_beta =  readTrainedWeights(batch_normalization_38_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_38_mean_path =  dir_prefix + std::string("batch_normalization_38_mean.bin"); 
-  void* batch_normalization_38_mean =  readTrainedWeights(batch_normalization_38_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_38_variance_path =  dir_prefix + std::string("batch_normalization_38_variance.bin"); 
-  void* batch_normalization_38_variance =  readTrainedWeights(batch_normalization_38_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_39_w_path =  dir_prefix + std::string("conv2d_39_w.bin"); 
-  void* conv2d_39_w =  readTrainedWeights(conv2d_39_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_39_b_path =  dir_prefix + std::string("conv2d_39_b.bin"); 
-  void* conv2d_39_b =  readTrainedWeights(conv2d_39_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_39_gamma_path =  dir_prefix + std::string("batch_normalization_39_gamma.bin"); 
-  void* batch_normalization_39_gamma =  readTrainedWeights(batch_normalization_39_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_39_beta_path =  dir_prefix + std::string("batch_normalization_39_beta.bin"); 
-  void* batch_normalization_39_beta =  readTrainedWeights(batch_normalization_39_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_39_mean_path =  dir_prefix + std::string("batch_normalization_39_mean.bin"); 
-  void* batch_normalization_39_mean =  readTrainedWeights(batch_normalization_39_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_39_variance_path =  dir_prefix + std::string("batch_normalization_39_variance.bin"); 
-  void* batch_normalization_39_variance =  readTrainedWeights(batch_normalization_39_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_40_w_path =  dir_prefix + std::string("conv2d_40_w.bin"); 
-  void* conv2d_40_w =  readTrainedWeights(conv2d_40_w_path.c_str(), 0,1024,256,1,1); 
-  std::string conv2d_40_b_path =  dir_prefix + std::string("conv2d_40_b.bin"); 
-  void* conv2d_40_b =  readTrainedWeights(conv2d_40_b_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_40_gamma_path =  dir_prefix + std::string("batch_normalization_40_gamma.bin"); 
-  void* batch_normalization_40_gamma =  readTrainedWeights(batch_normalization_40_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_40_beta_path =  dir_prefix + std::string("batch_normalization_40_beta.bin"); 
-  void* batch_normalization_40_beta =  readTrainedWeights(batch_normalization_40_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_40_mean_path =  dir_prefix + std::string("batch_normalization_40_mean.bin"); 
-  void* batch_normalization_40_mean =  readTrainedWeights(batch_normalization_40_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_40_variance_path =  dir_prefix + std::string("batch_normalization_40_variance.bin"); 
-  void* batch_normalization_40_variance =  readTrainedWeights(batch_normalization_40_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_41_w_path =  dir_prefix + std::string("conv2d_41_w.bin"); 
-  void* conv2d_41_w =  readTrainedWeights(conv2d_41_w_path.c_str(), 0,256,1024,1,1); 
-  std::string conv2d_41_b_path =  dir_prefix + std::string("conv2d_41_b.bin"); 
-  void* conv2d_41_b =  readTrainedWeights(conv2d_41_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_41_gamma_path =  dir_prefix + std::string("batch_normalization_41_gamma.bin"); 
-  void* batch_normalization_41_gamma =  readTrainedWeights(batch_normalization_41_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_41_beta_path =  dir_prefix + std::string("batch_normalization_41_beta.bin"); 
-  void* batch_normalization_41_beta =  readTrainedWeights(batch_normalization_41_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_41_mean_path =  dir_prefix + std::string("batch_normalization_41_mean.bin"); 
-  void* batch_normalization_41_mean =  readTrainedWeights(batch_normalization_41_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_41_variance_path =  dir_prefix + std::string("batch_normalization_41_variance.bin"); 
-  void* batch_normalization_41_variance =  readTrainedWeights(batch_normalization_41_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_42_w_path =  dir_prefix + std::string("conv2d_42_w.bin"); 
-  void* conv2d_42_w =  readTrainedWeights(conv2d_42_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_42_b_path =  dir_prefix + std::string("conv2d_42_b.bin"); 
-  void* conv2d_42_b =  readTrainedWeights(conv2d_42_b_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_42_gamma_path =  dir_prefix + std::string("batch_normalization_42_gamma.bin"); 
-  void* batch_normalization_42_gamma =  readTrainedWeights(batch_normalization_42_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_42_beta_path =  dir_prefix + std::string("batch_normalization_42_beta.bin"); 
-  void* batch_normalization_42_beta =  readTrainedWeights(batch_normalization_42_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_42_mean_path =  dir_prefix + std::string("batch_normalization_42_mean.bin"); 
-  void* batch_normalization_42_mean =  readTrainedWeights(batch_normalization_42_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_42_variance_path =  dir_prefix + std::string("batch_normalization_42_variance.bin"); 
-  void* batch_normalization_42_variance =  readTrainedWeights(batch_normalization_42_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_43_w_path =  dir_prefix + std::string("conv2d_43_w.bin"); 
-  void* conv2d_43_w =  readTrainedWeights(conv2d_43_w_path.c_str(), 0,1024,256,1,1); 
-  std::string conv2d_43_b_path =  dir_prefix + std::string("conv2d_43_b.bin"); 
-  void* conv2d_43_b =  readTrainedWeights(conv2d_43_b_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_43_gamma_path =  dir_prefix + std::string("batch_normalization_43_gamma.bin"); 
-  void* batch_normalization_43_gamma =  readTrainedWeights(batch_normalization_43_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_43_beta_path =  dir_prefix + std::string("batch_normalization_43_beta.bin"); 
-  void* batch_normalization_43_beta =  readTrainedWeights(batch_normalization_43_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_43_mean_path =  dir_prefix + std::string("batch_normalization_43_mean.bin"); 
-  void* batch_normalization_43_mean =  readTrainedWeights(batch_normalization_43_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_43_variance_path =  dir_prefix + std::string("batch_normalization_43_variance.bin"); 
-  void* batch_normalization_43_variance =  readTrainedWeights(batch_normalization_43_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_44_w_path =  dir_prefix + std::string("conv2d_44_w.bin"); 
-  void* conv2d_44_w =  readTrainedWeights(conv2d_44_w_path.c_str(), 0,512,1024,1,1); 
-  std::string conv2d_44_b_path =  dir_prefix + std::string("conv2d_44_b.bin"); 
-  void* conv2d_44_b =  readTrainedWeights(conv2d_44_b_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_44_gamma_path =  dir_prefix + std::string("batch_normalization_44_gamma.bin"); 
-  void* batch_normalization_44_gamma =  readTrainedWeights(batch_normalization_44_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_44_beta_path =  dir_prefix + std::string("batch_normalization_44_beta.bin"); 
-  void* batch_normalization_44_beta =  readTrainedWeights(batch_normalization_44_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_44_mean_path =  dir_prefix + std::string("batch_normalization_44_mean.bin"); 
-  void* batch_normalization_44_mean =  readTrainedWeights(batch_normalization_44_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_44_variance_path =  dir_prefix + std::string("batch_normalization_44_variance.bin"); 
-  void* batch_normalization_44_variance =  readTrainedWeights(batch_normalization_44_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_45_w_path =  dir_prefix + std::string("conv2d_45_w.bin"); 
-  void* conv2d_45_w =  readTrainedWeights(conv2d_45_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_45_b_path =  dir_prefix + std::string("conv2d_45_b.bin"); 
-  void* conv2d_45_b =  readTrainedWeights(conv2d_45_b_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_45_gamma_path =  dir_prefix + std::string("batch_normalization_45_gamma.bin"); 
-  void* batch_normalization_45_gamma =  readTrainedWeights(batch_normalization_45_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_45_beta_path =  dir_prefix + std::string("batch_normalization_45_beta.bin"); 
-  void* batch_normalization_45_beta =  readTrainedWeights(batch_normalization_45_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_45_mean_path =  dir_prefix + std::string("batch_normalization_45_mean.bin"); 
-  void* batch_normalization_45_mean =  readTrainedWeights(batch_normalization_45_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_45_variance_path =  dir_prefix + std::string("batch_normalization_45_variance.bin"); 
-  void* batch_normalization_45_variance =  readTrainedWeights(batch_normalization_45_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_46_w_path =  dir_prefix + std::string("conv2d_46_w.bin"); 
-  void* conv2d_46_w =  readTrainedWeights(conv2d_46_w_path.c_str(), 0,2048,512,1,1); 
-  std::string conv2d_46_b_path =  dir_prefix + std::string("conv2d_46_b.bin"); 
-  void* conv2d_46_b =  readTrainedWeights(conv2d_46_b_path.c_str(), 0,1,2048,1,1); 
-  std::string conv2d_47_w_path =  dir_prefix + std::string("conv2d_47_w.bin"); 
-  void* conv2d_47_w =  readTrainedWeights(conv2d_47_w_path.c_str(), 0,2048,1024,1,1); 
-  std::string conv2d_47_b_path =  dir_prefix + std::string("conv2d_47_b.bin"); 
-  void* conv2d_47_b =  readTrainedWeights(conv2d_47_b_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_46_gamma_path =  dir_prefix + std::string("batch_normalization_46_gamma.bin"); 
-  void* batch_normalization_46_gamma =  readTrainedWeights(batch_normalization_46_gamma_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_46_beta_path =  dir_prefix + std::string("batch_normalization_46_beta.bin"); 
-  void* batch_normalization_46_beta =  readTrainedWeights(batch_normalization_46_beta_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_46_mean_path =  dir_prefix + std::string("batch_normalization_46_mean.bin"); 
-  void* batch_normalization_46_mean =  readTrainedWeights(batch_normalization_46_mean_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_46_variance_path =  dir_prefix + std::string("batch_normalization_46_variance.bin"); 
-  void* batch_normalization_46_variance =  readTrainedWeights(batch_normalization_46_variance_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_47_gamma_path =  dir_prefix + std::string("batch_normalization_47_gamma.bin"); 
-  void* batch_normalization_47_gamma =  readTrainedWeights(batch_normalization_47_gamma_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_47_beta_path =  dir_prefix + std::string("batch_normalization_47_beta.bin"); 
-  void* batch_normalization_47_beta =  readTrainedWeights(batch_normalization_47_beta_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_47_mean_path =  dir_prefix + std::string("batch_normalization_47_mean.bin"); 
-  void* batch_normalization_47_mean =  readTrainedWeights(batch_normalization_47_mean_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_47_variance_path =  dir_prefix + std::string("batch_normalization_47_variance.bin"); 
-  void* batch_normalization_47_variance =  readTrainedWeights(batch_normalization_47_variance_path.c_str(), 0,1,2048,1,1); 
-  std::string conv2d_48_w_path =  dir_prefix + std::string("conv2d_48_w.bin"); 
-  void* conv2d_48_w =  readTrainedWeights(conv2d_48_w_path.c_str(), 0,512,2048,1,1); 
-  std::string conv2d_48_b_path =  dir_prefix + std::string("conv2d_48_b.bin"); 
-  void* conv2d_48_b =  readTrainedWeights(conv2d_48_b_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_48_gamma_path =  dir_prefix + std::string("batch_normalization_48_gamma.bin"); 
-  void* batch_normalization_48_gamma =  readTrainedWeights(batch_normalization_48_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_48_beta_path =  dir_prefix + std::string("batch_normalization_48_beta.bin"); 
-  void* batch_normalization_48_beta =  readTrainedWeights(batch_normalization_48_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_48_mean_path =  dir_prefix + std::string("batch_normalization_48_mean.bin"); 
-  void* batch_normalization_48_mean =  readTrainedWeights(batch_normalization_48_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_48_variance_path =  dir_prefix + std::string("batch_normalization_48_variance.bin"); 
-  void* batch_normalization_48_variance =  readTrainedWeights(batch_normalization_48_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_49_w_path =  dir_prefix + std::string("conv2d_49_w.bin"); 
-  void* conv2d_49_w =  readTrainedWeights(conv2d_49_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_49_b_path =  dir_prefix + std::string("conv2d_49_b.bin"); 
-  void* conv2d_49_b =  readTrainedWeights(conv2d_49_b_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_49_gamma_path =  dir_prefix + std::string("batch_normalization_49_gamma.bin"); 
-  void* batch_normalization_49_gamma =  readTrainedWeights(batch_normalization_49_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_49_beta_path =  dir_prefix + std::string("batch_normalization_49_beta.bin"); 
-  void* batch_normalization_49_beta =  readTrainedWeights(batch_normalization_49_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_49_mean_path =  dir_prefix + std::string("batch_normalization_49_mean.bin"); 
-  void* batch_normalization_49_mean =  readTrainedWeights(batch_normalization_49_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_49_variance_path =  dir_prefix + std::string("batch_normalization_49_variance.bin"); 
-  void* batch_normalization_49_variance =  readTrainedWeights(batch_normalization_49_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_50_w_path =  dir_prefix + std::string("conv2d_50_w.bin"); 
-  void* conv2d_50_w =  readTrainedWeights(conv2d_50_w_path.c_str(), 0,2048,512,1,1); 
-  std::string conv2d_50_b_path =  dir_prefix + std::string("conv2d_50_b.bin"); 
-  void* conv2d_50_b =  readTrainedWeights(conv2d_50_b_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_50_gamma_path =  dir_prefix + std::string("batch_normalization_50_gamma.bin"); 
-  void* batch_normalization_50_gamma =  readTrainedWeights(batch_normalization_50_gamma_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_50_beta_path =  dir_prefix + std::string("batch_normalization_50_beta.bin"); 
-  void* batch_normalization_50_beta =  readTrainedWeights(batch_normalization_50_beta_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_50_mean_path =  dir_prefix + std::string("batch_normalization_50_mean.bin"); 
-  void* batch_normalization_50_mean =  readTrainedWeights(batch_normalization_50_mean_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_50_variance_path =  dir_prefix + std::string("batch_normalization_50_variance.bin"); 
-  void* batch_normalization_50_variance =  readTrainedWeights(batch_normalization_50_variance_path.c_str(), 0,1,2048,1,1); 
-  std::string conv2d_51_w_path =  dir_prefix + std::string("conv2d_51_w.bin"); 
-  void* conv2d_51_w =  readTrainedWeights(conv2d_51_w_path.c_str(), 0,512,2048,1,1); 
-  std::string conv2d_51_b_path =  dir_prefix + std::string("conv2d_51_b.bin"); 
-  void* conv2d_51_b =  readTrainedWeights(conv2d_51_b_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_51_gamma_path =  dir_prefix + std::string("batch_normalization_51_gamma.bin"); 
-  void* batch_normalization_51_gamma =  readTrainedWeights(batch_normalization_51_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_51_beta_path =  dir_prefix + std::string("batch_normalization_51_beta.bin"); 
-  void* batch_normalization_51_beta =  readTrainedWeights(batch_normalization_51_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_51_mean_path =  dir_prefix + std::string("batch_normalization_51_mean.bin"); 
-  void* batch_normalization_51_mean =  readTrainedWeights(batch_normalization_51_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_51_variance_path =  dir_prefix + std::string("batch_normalization_51_variance.bin"); 
-  void* batch_normalization_51_variance =  readTrainedWeights(batch_normalization_51_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_52_w_path =  dir_prefix + std::string("conv2d_52_w.bin"); 
-  void* conv2d_52_w =  readTrainedWeights(conv2d_52_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_52_b_path =  dir_prefix + std::string("conv2d_52_b.bin"); 
-  void* conv2d_52_b =  readTrainedWeights(conv2d_52_b_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_52_gamma_path =  dir_prefix + std::string("batch_normalization_52_gamma.bin"); 
-  void* batch_normalization_52_gamma =  readTrainedWeights(batch_normalization_52_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_52_beta_path =  dir_prefix + std::string("batch_normalization_52_beta.bin"); 
-  void* batch_normalization_52_beta =  readTrainedWeights(batch_normalization_52_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_52_mean_path =  dir_prefix + std::string("batch_normalization_52_mean.bin"); 
-  void* batch_normalization_52_mean =  readTrainedWeights(batch_normalization_52_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_52_variance_path =  dir_prefix + std::string("batch_normalization_52_variance.bin"); 
-  void* batch_normalization_52_variance =  readTrainedWeights(batch_normalization_52_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_53_w_path =  dir_prefix + std::string("conv2d_53_w.bin"); 
-  void* conv2d_53_w =  readTrainedWeights(conv2d_53_w_path.c_str(), 0,2048,512,1,1); 
-  std::string conv2d_53_b_path =  dir_prefix + std::string("conv2d_53_b.bin"); 
-  void* conv2d_53_b =  readTrainedWeights(conv2d_53_b_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_53_gamma_path =  dir_prefix + std::string("batch_normalization_53_gamma.bin"); 
-  void* batch_normalization_53_gamma =  readTrainedWeights(batch_normalization_53_gamma_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_53_beta_path =  dir_prefix + std::string("batch_normalization_53_beta.bin"); 
-  void* batch_normalization_53_beta =  readTrainedWeights(batch_normalization_53_beta_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_53_mean_path =  dir_prefix + std::string("batch_normalization_53_mean.bin"); 
-  void* batch_normalization_53_mean =  readTrainedWeights(batch_normalization_53_mean_path.c_str(), 0,1,2048,1,1); 
-  std::string batch_normalization_53_variance_path =  dir_prefix + std::string("batch_normalization_53_variance.bin"); 
-  void* batch_normalization_53_variance =  readTrainedWeights(batch_normalization_53_variance_path.c_str(), 0,1,2048,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,1000); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1000,1,1); 
-
-
-
-  startMemTracking(); 
-
-  int test_input_size = 2000; 
-  int batch_size = 100; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size; 
-    int end = (i + 1) * batch_size; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,3,224,224); 
-
-    void* var_2 = tensorConvolution(input, conv2d_1_w, 3, 3, 2, 2, 1, 1); 
-    void* var_3 = tensorAdd(var_2, conv2d_1_b); 
-    void* var_4 = tensorRelu(var_3);
-    // NOTE: Issue is that this pooling has window 2*2 
-    void* var_5 = tensorPooling(var_4,0,3,3,0,0,2,2); 
-    void* var_6 = tensorBatchNorm(var_5, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-    void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-    void* var_8 = tensorAdd(var_7, conv2d_2_b); 
-    void* var_9 = tensorBatchNorm(var_8, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-    void* var_10 = tensorRelu(var_9); 
-    void* var_11 = tensorConvolution(var_10, conv2d_3_w, 1, 1, 1, 1, 1, 1); 
-    void* var_12 = tensorAdd(var_11, conv2d_3_b); 
-    void* var_13 = tensorBatchNorm(var_12, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-    void* var_14 = tensorRelu(var_13); 
-    void* var_15 = tensorConvolution(var_14, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-    void* var_16 = tensorAdd(var_15, conv2d_4_b); 
-    void* var_17 = tensorBatchNorm(var_16, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-    void* var_18 = tensorConvolution(var_6, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-    void* var_19 = tensorAdd(var_18, conv2d_5_b); 
-    void* var_20 = tensorBatchNorm(var_19, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-    void* var_21 = tensorAdd(var_17, var_20); 
-    void* var_22 = tensorRelu(var_21); 
-    void* var_23 = tensorConvolution(var_22, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-    void* var_24 = tensorAdd(var_23, conv2d_6_b); 
-    void* var_25 = tensorBatchNorm(var_24, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-    void* var_26 = tensorRelu(var_25); 
-    void* var_27 = tensorConvolution(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 1); 
-    void* var_28 = tensorAdd(var_27, conv2d_7_b); 
-    void* var_29 = tensorBatchNorm(var_28, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-    void* var_30 = tensorRelu(var_29); 
-    void* var_31 = tensorConvolution(var_30, conv2d_8_w, 0, 0, 1, 1, 1, 1); 
-    void* var_32 = tensorAdd(var_31, conv2d_8_b); 
-    void* var_33 = tensorBatchNorm(var_32, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-    void* var_34 = tensorAdd(var_33, var_22); 
-    void* var_35 = tensorRelu(var_34); 
-    void* var_36 = tensorConvolution(var_35, conv2d_9_w, 0, 0, 1, 1, 1, 1); 
-    void* var_37 = tensorAdd(var_36, conv2d_9_b); 
-    void* var_38 = tensorBatchNorm(var_37, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-    void* var_39 = tensorRelu(var_38); 
-    void* var_40 = tensorConvolution(var_39, conv2d_10_w, 1, 1, 1, 1, 1, 1); 
-    void* var_41 = tensorAdd(var_40, conv2d_10_b); 
-    void* var_42 = tensorBatchNorm(var_41, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-    void* var_43 = tensorRelu(var_42); 
-    void* var_44 = tensorConvolution(var_43, conv2d_11_w, 0, 0, 1, 1, 1, 1); 
-    void* var_45 = tensorAdd(var_44, conv2d_11_b); 
-    void* var_46 = tensorBatchNorm(var_45, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-    void* var_47 = tensorAdd(var_46, var_35); 
-    void* var_48 = tensorRelu(var_47); 
-    void* var_49 = tensorConvolution(var_48, conv2d_12_w, 0, 0, 2, 2, 1, 1); 
-    void* var_50 = tensorAdd(var_49, conv2d_12_b); 
-    void* var_51 = tensorBatchNorm(var_50, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-    void* var_52 = tensorRelu(var_51); 
-    void* var_53 = tensorConvolution(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 1); 
-    void* var_54 = tensorAdd(var_53, conv2d_13_b); 
-    void* var_55 = tensorBatchNorm(var_54, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-    void* var_56 = tensorRelu(var_55); 
-    void* var_57 = tensorConvolution(var_56, conv2d_14_w, 0, 0, 1, 1, 1, 1); 
-    void* var_58 = tensorAdd(var_57, conv2d_14_b); 
-    void* var_59 = tensorBatchNorm(var_58, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); 
-    void* var_60 = tensorConvolution(var_48, conv2d_15_w, 0, 0, 2, 2, 1, 1); 
-    void* var_61 = tensorAdd(var_60, conv2d_15_b); 
-    void* var_62 = tensorBatchNorm(var_61, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); 
-    void* var_63 = tensorAdd(var_59, var_62); 
-    void* var_64 = tensorRelu(var_63); 
-    void* var_65 = tensorConvolution(var_64, conv2d_16_w, 0, 0, 1, 1, 1, 1); 
-    void* var_66 = tensorAdd(var_65, conv2d_16_b); 
-    void* var_67 = tensorBatchNorm(var_66, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); 
-    void* var_68 = tensorRelu(var_67); 
-    void* var_69 = tensorConvolution(var_68, conv2d_17_w, 1, 1, 1, 1, 1, 1); 
-    void* var_70 = tensorAdd(var_69, conv2d_17_b); 
-    void* var_71 = tensorBatchNorm(var_70, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); 
-    void* var_72 = tensorRelu(var_71); 
-    void* var_73 = tensorConvolution(var_72, conv2d_18_w, 0, 0, 1, 1, 1, 1); 
-    void* var_74 = tensorAdd(var_73, conv2d_18_b); 
-    void* var_75 = tensorBatchNorm(var_74, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); 
-    void* var_76 = tensorAdd(var_75, var_64); 
-    void* var_77 = tensorRelu(var_76); 
-    void* var_78 = tensorConvolution(var_77, conv2d_19_w, 0, 0, 1, 1, 1, 1); 
-    void* var_79 = tensorAdd(var_78, conv2d_19_b); 
-    void* var_80 = tensorBatchNorm(var_79, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); 
-    void* var_81 = tensorRelu(var_80); 
-    void* var_82 = tensorConvolution(var_81, conv2d_20_w, 1, 1, 1, 1, 1, 1); 
-    void* var_83 = tensorAdd(var_82, conv2d_20_b); 
-    void* var_84 = tensorBatchNorm(var_83, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); 
-    void* var_85 = tensorRelu(var_84); 
-    void* var_86 = tensorConvolution(var_85, conv2d_21_w, 0, 0, 1, 1, 1, 1); 
-    void* var_87 = tensorAdd(var_86, conv2d_21_b); 
-    void* var_88 = tensorBatchNorm(var_87, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); 
-    void* var_89 = tensorAdd(var_88, var_77); 
-    void* var_90 = tensorRelu(var_89); 
-    void* var_91 = tensorConvolution(var_90, conv2d_22_w, 0, 0, 1, 1, 1, 1); 
-    void* var_92 = tensorAdd(var_91, conv2d_22_b); 
-    void* var_93 = tensorBatchNorm(var_92, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); 
-    void* var_94 = tensorRelu(var_93); 
-    void* var_95 = tensorConvolution(var_94, conv2d_23_w, 1, 1, 1, 1, 1, 1); 
-    void* var_96 = tensorAdd(var_95, conv2d_23_b); 
-    void* var_97 = tensorBatchNorm(var_96, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); 
-    void* var_98 = tensorRelu(var_97); 
-    void* var_99 = tensorConvolution(var_98, conv2d_24_w, 0, 0, 1, 1, 1, 1); 
-    void* var_100 = tensorAdd(var_99, conv2d_24_b); 
-    void* var_101 = tensorBatchNorm(var_100, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); 
-    void* var_102 = tensorAdd(var_101, var_90); 
-    void* var_103 = tensorRelu(var_102); 
-    void* var_104 = tensorConvolution(var_103, conv2d_25_w, 0, 0, 2, 2, 1, 1); 
-    void* var_105 = tensorAdd(var_104, conv2d_25_b); 
-    void* var_106 = tensorBatchNorm(var_105, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); 
-    void* var_107 = tensorRelu(var_106); 
-    void* var_108 = tensorConvolution(var_107, conv2d_26_w, 1, 1, 1, 1, 1, 1); 
-    void* var_109 = tensorAdd(var_108, conv2d_26_b); 
-    void* var_110 = tensorBatchNorm(var_109, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); 
-    void* var_111 = tensorRelu(var_110); 
-    void* var_112 = tensorConvolution(var_111, conv2d_27_w, 0, 0, 1, 1, 1, 1); 
-    void* var_113 = tensorAdd(var_112, conv2d_27_b); 
-    void* var_114 = tensorBatchNorm(var_113, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); 
-    void* var_115 = tensorConvolution(var_103, conv2d_28_w, 0, 0, 2, 2, 1, 1); 
-    void* var_116 = tensorAdd(var_115, conv2d_28_b); 
-    void* var_117 = tensorBatchNorm(var_116, batch_normalization_28_gamma, batch_normalization_28_beta, batch_normalization_28_mean, batch_normalization_28_variance, 0.001); 
-    void* var_118 = tensorAdd(var_114, var_117); 
-    void* var_119 = tensorRelu(var_118); 
-    void* var_120 = tensorConvolution(var_119, conv2d_29_w, 0, 0, 1, 1, 1, 1); 
-    void* var_121 = tensorAdd(var_120, conv2d_29_b); 
-    void* var_122 = tensorBatchNorm(var_121, batch_normalization_29_gamma, batch_normalization_29_beta, batch_normalization_29_mean, batch_normalization_29_variance, 0.001); 
-    void* var_123 = tensorRelu(var_122); 
-    void* var_124 = tensorConvolution(var_123, conv2d_30_w, 1, 1, 1, 1, 1, 1); 
-    void* var_125 = tensorAdd(var_124, conv2d_30_b); 
-    void* var_126 = tensorBatchNorm(var_125, batch_normalization_30_gamma, batch_normalization_30_beta, batch_normalization_30_mean, batch_normalization_30_variance, 0.001); 
-    void* var_127 = tensorRelu(var_126); 
-    void* var_128 = tensorConvolution(var_127, conv2d_31_w, 0, 0, 1, 1, 1, 1); 
-    void* var_129 = tensorAdd(var_128, conv2d_31_b); 
-    void* var_130 = tensorBatchNorm(var_129, batch_normalization_31_gamma, batch_normalization_31_beta, batch_normalization_31_mean, batch_normalization_31_variance, 0.001); 
-    void* var_131 = tensorAdd(var_130, var_119); 
-    void* var_132 = tensorRelu(var_131); 
-    void* var_133 = tensorConvolution(var_132, conv2d_32_w, 0, 0, 1, 1, 1, 1); 
-    void* var_134 = tensorAdd(var_133, conv2d_32_b); 
-    void* var_135 = tensorBatchNorm(var_134, batch_normalization_32_gamma, batch_normalization_32_beta, batch_normalization_32_mean, batch_normalization_32_variance, 0.001); 
-    void* var_136 = tensorRelu(var_135); 
-    void* var_137 = tensorConvolution(var_136, conv2d_33_w, 1, 1, 1, 1, 1, 1); 
-    void* var_138 = tensorAdd(var_137, conv2d_33_b); 
-    void* var_139 = tensorBatchNorm(var_138, batch_normalization_33_gamma, batch_normalization_33_beta, batch_normalization_33_mean, batch_normalization_33_variance, 0.001); 
-    void* var_140 = tensorRelu(var_139); 
-    void* var_141 = tensorConvolution(var_140, conv2d_34_w, 0, 0, 1, 1, 1, 1); 
-    void* var_142 = tensorAdd(var_141, conv2d_34_b); 
-    void* var_143 = tensorBatchNorm(var_142, batch_normalization_34_gamma, batch_normalization_34_beta, batch_normalization_34_mean, batch_normalization_34_variance, 0.001); 
-    void* var_144 = tensorAdd(var_143, var_132); 
-    void* var_145 = tensorRelu(var_144); 
-    void* var_146 = tensorConvolution(var_145, conv2d_35_w, 0, 0, 1, 1, 1, 1); 
-    void* var_147 = tensorAdd(var_146, conv2d_35_b); 
-    void* var_148 = tensorBatchNorm(var_147, batch_normalization_35_gamma, batch_normalization_35_beta, batch_normalization_35_mean, batch_normalization_35_variance, 0.001); 
-    void* var_149 = tensorRelu(var_148); 
-    void* var_150 = tensorConvolution(var_149, conv2d_36_w, 1, 1, 1, 1, 1, 1); 
-    void* var_151 = tensorAdd(var_150, conv2d_36_b); 
-    void* var_152 = tensorBatchNorm(var_151, batch_normalization_36_gamma, batch_normalization_36_beta, batch_normalization_36_mean, batch_normalization_36_variance, 0.001); 
-    void* var_153 = tensorRelu(var_152); 
-    void* var_154 = tensorConvolution(var_153, conv2d_37_w, 0, 0, 1, 1, 1, 1); 
-    void* var_155 = tensorAdd(var_154, conv2d_37_b); 
-    void* var_156 = tensorBatchNorm(var_155, batch_normalization_37_gamma, batch_normalization_37_beta, batch_normalization_37_mean, batch_normalization_37_variance, 0.001); 
-    void* var_157 = tensorAdd(var_156, var_145); 
-    void* var_158 = tensorRelu(var_157); 
-    void* var_159 = tensorConvolution(var_158, conv2d_38_w, 0, 0, 1, 1, 1, 1); 
-    void* var_160 = tensorAdd(var_159, conv2d_38_b); 
-    void* var_161 = tensorBatchNorm(var_160, batch_normalization_38_gamma, batch_normalization_38_beta, batch_normalization_38_mean, batch_normalization_38_variance, 0.001); 
-    void* var_162 = tensorRelu(var_161); 
-    void* var_163 = tensorConvolution(var_162, conv2d_39_w, 1, 1, 1, 1, 1, 1); 
-    void* var_164 = tensorAdd(var_163, conv2d_39_b); 
-    void* var_165 = tensorBatchNorm(var_164, batch_normalization_39_gamma, batch_normalization_39_beta, batch_normalization_39_mean, batch_normalization_39_variance, 0.001); 
-    void* var_166 = tensorRelu(var_165); 
-    void* var_167 = tensorConvolution(var_166, conv2d_40_w, 0, 0, 1, 1, 1, 1); 
-    void* var_168 = tensorAdd(var_167, conv2d_40_b); 
-    void* var_169 = tensorBatchNorm(var_168, batch_normalization_40_gamma, batch_normalization_40_beta, batch_normalization_40_mean, batch_normalization_40_variance, 0.001); 
-    void* var_170 = tensorAdd(var_169, var_158); 
-    void* var_171 = tensorRelu(var_170); 
-    void* var_172 = tensorConvolution(var_171, conv2d_41_w, 0, 0, 1, 1, 1, 1); 
-    void* var_173 = tensorAdd(var_172, conv2d_41_b); 
-    void* var_174 = tensorBatchNorm(var_173, batch_normalization_41_gamma, batch_normalization_41_beta, batch_normalization_41_mean, batch_normalization_41_variance, 0.001); 
-    void* var_175 = tensorRelu(var_174); 
-    void* var_176 = tensorConvolution(var_175, conv2d_42_w, 1, 1, 1, 1, 1, 1); 
-    void* var_177 = tensorAdd(var_176, conv2d_42_b); 
-    void* var_178 = tensorBatchNorm(var_177, batch_normalization_42_gamma, batch_normalization_42_beta, batch_normalization_42_mean, batch_normalization_42_variance, 0.001); 
-    void* var_179 = tensorRelu(var_178); 
-    void* var_180 = tensorConvolution(var_179, conv2d_43_w, 0, 0, 1, 1, 1, 1); 
-    void* var_181 = tensorAdd(var_180, conv2d_43_b); 
-    void* var_182 = tensorBatchNorm(var_181, batch_normalization_43_gamma, batch_normalization_43_beta, batch_normalization_43_mean, batch_normalization_43_variance, 0.001); 
-    void* var_183 = tensorAdd(var_182, var_171); 
-    void* var_184 = tensorRelu(var_183); 
-    void* var_185 = tensorConvolution(var_184, conv2d_44_w, 0, 0, 2, 2, 1, 1); 
-    void* var_186 = tensorAdd(var_185, conv2d_44_b); 
-    void* var_187 = tensorBatchNorm(var_186, batch_normalization_44_gamma, batch_normalization_44_beta, batch_normalization_44_mean, batch_normalization_44_variance, 0.001); 
-    void* var_188 = tensorRelu(var_187); 
-    void* var_189 = tensorConvolution(var_188, conv2d_45_w, 1, 1, 1, 1, 1, 1); 
-    void* var_190 = tensorAdd(var_189, conv2d_45_b); 
-    void* var_191 = tensorBatchNorm(var_190, batch_normalization_45_gamma, batch_normalization_45_beta, batch_normalization_45_mean, batch_normalization_45_variance, 0.001); 
-    void* var_192 = tensorRelu(var_191); 
-    void* var_193 = tensorConvolution(var_192, conv2d_46_w, 0, 0, 1, 1, 1, 1); 
-    void* var_194 = tensorAdd(var_193, conv2d_46_b); 
-    void* var_195 = tensorBatchNorm(var_194, batch_normalization_46_gamma, batch_normalization_46_beta, batch_normalization_46_mean, batch_normalization_46_variance, 0.001); 
-    void* var_196 = tensorConvolution(var_184, conv2d_47_w, 0, 0, 2, 2, 1, 1); 
-    void* var_197 = tensorAdd(var_196, conv2d_47_b); 
-    void* var_198 = tensorBatchNorm(var_197, batch_normalization_47_gamma, batch_normalization_47_beta, batch_normalization_47_mean, batch_normalization_47_variance, 0.001); 
-    void* var_199 = tensorAdd(var_195, var_198); 
-    void* var_200 = tensorRelu(var_199); 
-    void* var_201 = tensorConvolution(var_200, conv2d_48_w, 0, 0, 1, 1, 1, 1); 
-    void* var_202 = tensorAdd(var_201, conv2d_48_b); 
-    void* var_203 = tensorBatchNorm(var_202, batch_normalization_48_gamma, batch_normalization_48_beta, batch_normalization_48_mean, batch_normalization_48_variance, 0.001); 
-    void* var_204 = tensorRelu(var_203); 
-    void* var_205 = tensorConvolution(var_204, conv2d_49_w, 1, 1, 1, 1, 1, 1); 
-    void* var_206 = tensorAdd(var_205, conv2d_49_b); 
-    void* var_207 = tensorBatchNorm(var_206, batch_normalization_49_gamma, batch_normalization_49_beta, batch_normalization_49_mean, batch_normalization_49_variance, 0.001); 
-    void* var_208 = tensorRelu(var_207); 
-    void* var_209 = tensorConvolution(var_208, conv2d_50_w, 0, 0, 1, 1, 1, 1); 
-    void* var_210 = tensorAdd(var_209, conv2d_50_b); 
-    void* var_211 = tensorBatchNorm(var_210, batch_normalization_50_gamma, batch_normalization_50_beta, batch_normalization_50_mean, batch_normalization_50_variance, 0.001); 
-    void* var_212 = tensorAdd(var_211, var_200); 
-    void* var_213 = tensorRelu(var_212); 
-    void* var_214 = tensorConvolution(var_213, conv2d_51_w, 0, 0, 1, 1, 1, 1); 
-    void* var_215 = tensorAdd(var_214, conv2d_51_b); 
-    void* var_216 = tensorBatchNorm(var_215, batch_normalization_51_gamma, batch_normalization_51_beta, batch_normalization_51_mean, batch_normalization_51_variance, 0.001); 
-    void* var_217 = tensorRelu(var_216); 
-    void* var_218 = tensorConvolution(var_217, conv2d_52_w, 1, 1, 1, 1, 1, 1); 
-    void* var_219 = tensorAdd(var_218, conv2d_52_b); 
-    void* var_220 = tensorBatchNorm(var_219, batch_normalization_52_gamma, batch_normalization_52_beta, batch_normalization_52_mean, batch_normalization_52_variance, 0.001); 
-    void* var_221 = tensorRelu(var_220); 
-    void* var_222 = tensorConvolution(var_221, conv2d_53_w, 0, 0, 1, 1, 1, 1); 
-    void* var_223 = tensorAdd(var_222, conv2d_53_b); 
-    void* var_224 = tensorBatchNorm(var_223, batch_normalization_53_gamma, batch_normalization_53_beta, batch_normalization_53_mean, batch_normalization_53_variance, 0.001); 
-    void* var_225 = tensorAdd(var_224, var_213); 
-    void* var_226 = tensorRelu(var_225); 
-    void* var_227 = tensorPooling(var_226,1,7,7,0,0,7,7); 
-    void* var_229 = tensorGemmGPU(var_227, dense_1_w); 
-    void* var_230 = tensorAdd(var_229, dense_1_b); 
-    void* var_231 = tensorSoftmax(var_230); 
-
-    uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy3(labels, var_231); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline.cc
deleted file mode 100644
index b694f007b2e1c4cbe71bbe53c1065888542b23f1..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline.cc
+++ /dev/null
@@ -1,161 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* Pipeline: Gaussian - Outline - Motion Blur - Emboss ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  //long int test_batch_size = 9145;
-  long int test_batch_size = 2000;
-  long int H = 240;
-  long int W = 300;
-
-  printf("Reading input\n");
-  void* input = readTrainedWeights("../model_params/pipeline/dataset/caltech101_255_float32.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-  printf("Reading golden output\n");
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/caltech-G-O-M-E-FP32-clipped-2000.bin",
-  //void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/caltech-gaussian.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-					  float_type, 1, 1, 5, 5);  
-  void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-					  float_type, 1, 1, 1, 1);  
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-
-    void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255);
-
-    void* outline_out = tensorConvolution(gaussian_out_clip, outline_filter, 1, 1, 1, 1,
-                                       conv_mode, conv_precision);
-    void * outline_out_clip = tensorRelu2(outline_out, 0, 255);
-
-    void* motionblur_out = tensorConvolution(outline_out_clip, motionblur_filter, 4, 4, 1, 1,
-                                       conv_mode, conv_precision);
-    void * motionblur_out_clip = tensorRelu2(motionblur_out, 0, 255);
-
-    void* emboss_out = tensorConvolution(motionblur_out_clip, emboss_filter, 2, 2, 1, 1,
-                                       conv_mode, conv_precision);
-    void* emboss_bias_out = tensorAdd(emboss_out, emboss_bias);
-    void* result = tensorRelu2(emboss_bias_out, 0, 255);
-    //void* result = gaussian_out;
-
-    // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation
-    hpvm_request_tensor(result, 0);
-    
-    dumpOutput(result);
-    
-  //void* psnr_output = readTrainedWeights("../model_params/pipeline/golden_output/caltech-gaussian.bin",
-                                        //float_type,
-                                        //test_batch_size, 1, H, W);
-    computePSNRViolation(result, golden_output, 30);
-
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEMO.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEMO.cc
deleted file mode 100644
index 199a29a73af1a98fac31ae55f93c8bc8e7e2d6d4..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEMO.cc
+++ /dev/null
@@ -1,153 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* Pipeline: Gaussian - Emboss - Motion Blur - Outline ********** \n");
-
-  //long int test_batch_size = 9145;
-  long int test_batch_size = 1000;
-  long int H = 240;
-  long int W = 300;
-
-  printf("Reading input\n");
-  void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-				   float_type,
-				   test_batch_size, 1, H, W);
-  
-  printf("Reading golden output\n");
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEMO_calib.bin",
-  					   float_type,
-  					   test_batch_size, 1, H, W);
-
-
-  void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-					  float_type, 1, 1, 5, 5);  
-  void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-					  float_type, 1, 1, 1, 1);  
-
-
-
-  //printTensorValues(input);
-  //printTensorValues(motionblur_filter);
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-      
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-
-    void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255);
-
-    void* emboss_out = tensorConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1,
-                                       conv_mode, conv_precision);
-    void* emboss_bias_out = tensorAdd(emboss_out, emboss_bias);
-    void* emboss_bias_out_clip = tensorRelu2(emboss_bias_out, 0, 255);
-
-    void* motionblur_out = tensorConvolution(emboss_bias_out_clip, motionblur_filter, 4, 4, 1, 1,
-                                       conv_mode, conv_precision);
-    void * motionblur_out_clip = tensorRelu2(motionblur_out, 0, 255);
-
-    void* outline_out = tensorConvolution(motionblur_out_clip, outline_filter, 1, 1, 1, 1,
-                                       conv_mode, conv_precision);
-    void* result = tensorRelu2(outline_out, 0, 255);
-
-
-    // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation
-    hpvm_request_tensor(result, 0);
-    
-    //dumpOutput(result, "GEMO_calib.bin");
-        
-    computePSNRViolation(result, golden_output, 30);
-
-   
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEO.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEO.cc
deleted file mode 100644
index c2d6d1fb4dd3f8e0fe25db4f2628700a60aa44da..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEO.cc
+++ /dev/null
@@ -1,147 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* Pipeline: Gaussian - Emboss - Outline ********** \n");
-
-  //long int test_batch_size = 9145;
-  long int test_batch_size = 1000;
-  long int H = 240;
-  long int W = 300;
-
-  printf("Reading input\n");
-  void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-  printf("Reading golden output\n");
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEO_calib.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-
-  void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-					  float_type, 1, 1, 5, 5);  
-  void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-					  float_type, 1, 1, 1, 1);  
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-
-    void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255);
-
-    void* emboss_out = tensorConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1,
-                                       conv_mode, conv_precision);
-    void* emboss_bias_out = tensorAdd(emboss_out, emboss_bias);
-    void* emboss_bias_out_clip = tensorRelu2(emboss_bias_out, 0, 255);
-
-    void* outline_out = tensorConvolution(emboss_bias_out_clip, outline_filter, 1, 1, 1, 1,
-                                       conv_mode, conv_precision);
-    void * result = tensorRelu2(outline_out, 0, 255);
-
-    //void* result = gaussian_out;
-
-    // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation
-    hpvm_request_tensor(result, 0);
-    
-    //dumpOutput(result, "GEO_calib.bin");
-    
-    computePSNRViolation(result, golden_output, 30);
-
-   
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEOM.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEOM.cc
deleted file mode 100644
index 9c219f4daac2af708f02a64d97bf84ae36047316..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEOM.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* Pipeline: Gaussian - Emboss - Outline - Motion Blur ********** \n");
-  //long int test_batch_size = 9145;
-  long int test_batch_size = 1000;
-  long int H = 240;
-  long int W = 300;
-
-  printf("Reading input\n");
-  
-  void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W); 
-  
-  printf("Reading golden output\n");
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEOM_calib.bin",
-					   float_type,
-					   test_batch_size, 1, H, W);
-
-
-  void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-					  float_type, 1, 1, 5, 5);  
-  void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-					  float_type, 1, 1, 1, 1);  
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-
-    void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255);
-
-    void* emboss_out = tensorConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1,
-                                       conv_mode, conv_precision);
-    void* emboss_bias_out = tensorAdd(emboss_out, emboss_bias);
-    void* emboss_bias_out_clip = tensorRelu2(emboss_bias_out, 0, 255);
-
-    void* outline_out = tensorConvolution(emboss_bias_out_clip, outline_filter, 1, 1, 1, 1,
-                                       conv_mode, conv_precision);
-    void * outline_out_clip = tensorRelu2(outline_out, 0, 255);
-
-    void* motionblur_out = tensorConvolution(outline_out_clip, motionblur_filter, 4, 4, 1, 1,
-                                       conv_mode, conv_precision);
-    void * result = tensorRelu2(motionblur_out, 0, 255);
-
-
-    // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation
-    hpvm_request_tensor(result, 0);
-    
-    //dumpOutput(result, "GEOM_calib.bin");
-    
-    computePSNRViolation(result, golden_output, 30);
-
-    
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSM.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSM.cc
deleted file mode 100644
index 721eb887ca0a6cd9e6cb5c992f7c0559716a1259..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSM.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* Pipeline: Gaussian - Sharpen - Motion Blur ********** \n");
-
-  //long int test_batch_size = 9145;
-  long int test_batch_size = 1000;
-  long int H = 240;
-  long int W = 300;
-
-  printf("Reading input\n");
-  void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-  printf("Reading golden output\n");
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSM_calib.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-					  float_type, 1, 1, 5, 5);  
-  void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-					  float_type, 1, 1, 1, 1);  
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-    void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255);
-
-    void* sharpen_out = tensorConvolution(gaussian_out_clip, sharpen_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    void * sharpen_out_clip = tensorRelu2(sharpen_out, 0, 255);
-
-    void* motionblur_out = tensorConvolution(sharpen_out_clip, motionblur_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-    void * result = tensorRelu2(motionblur_out, 0, 255);
-
-    // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation
-    hpvm_request_tensor(result, 0);
-    
-    //dumpOutput(result, "GSM_calib.bin");
-    
-    computePSNRViolation(result, golden_output, 30);
-
-    
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSME.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSME.cc
deleted file mode 100644
index 19f0210e83939568ae2ca9b198ccfa95be70113e..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSME.cc
+++ /dev/null
@@ -1,153 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* Pipeline: Gaussian - Sharpen - Motion Blur - Emboss ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  //long int test_batch_size = 9145;
-  long int test_batch_size = 1000;
-  long int H = 240;
-  long int W = 300;
-
-  printf("Reading input\n");
-  void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-				   float_type,
-				   test_batch_size, 1, H, W);
-  
-  printf("Reading golden output\n");
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSME_calib.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-
-  void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-					  float_type, 1, 1, 3, 3);    
-  void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-					  float_type, 1, 1, 9, 9);    
-  void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-					  float_type, 1, 1, 5, 5);  
-  void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-					  float_type, 1, 1, 1, 1);  
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-    void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255);
-
-    void* sharpen_out = tensorConvolution(gaussian_out_clip, sharpen_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    void * sharpen_out_clip = tensorRelu2(sharpen_out, 0, 255);
-
-    void* motionblur_out = tensorConvolution(sharpen_out_clip, motionblur_filter, 4, 4, 1, 1,
-				       conv_mode, conv_precision);
-    void * motionblur_out_clip = tensorRelu2(motionblur_out, 0, 255);
-
-    void* emboss_out = tensorConvolution(motionblur_out_clip, emboss_filter, 2, 2, 1, 1,
-                                       conv_mode, conv_precision);
-    void* emboss_bias_out = tensorAdd(emboss_out, emboss_bias);
-    void* result = tensorRelu2(emboss_bias_out, 0, 255);
-
-
-    //void* result = gaussian_out;
-
-    // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation
-    hpvm_request_tensor(result, 0);
-    
-    //dumpOutput(result, "GSME_calib.bin");
-    
-    computePSNRViolation(result, golden_output, 30);
-
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/alexnet_cifar10_layers.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/alexnet_cifar10_layers.cc
deleted file mode 100644
index ac0d727f39df27763fb964d3846a39a4436ba2ef..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/alexnet_cifar10_layers.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  int total_runs = 2;
-  if(Opentuner_run){
-    total_runs = 100000;
-  }
-  
-  printf("********* Lenet-2 Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 5000;
-
-  uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size);
- 
-  for(int i = 0; i < total_runs; i++){
-
-    void* input = readTrainedWeights("../model_params/alexnet_cifar10/norm_cifar_input.bin",
-				     float_type,
-				     test_batch_size, 3, 32, 32);
-
-    void* conv1_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv1.bin",
-					    float_type, 64, 3, 11, 11);  
-    void* conv1_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv1_bias.bin",
-					  float_type, 1, 64, 1, 1);  
-    void* conv2_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv2.bin",
-					    float_type, 192, 64, 5, 5);  
-    void* conv2_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv2_bias.bin",
-					  float_type, 1, 192, 1, 1);
-
-    void* conv3_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv3.bin",
-					    float_type, 384, 192, 3, 3);  
-    void* conv3_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv3_bias.bin",
-					  float_type, 1, 384, 1, 1);  
-    void* conv4_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv4.bin",
-					    float_type, 256, 384, 3, 3);  
-    void* conv4_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv4_bias.bin",
-					  float_type, 1, 256, 1, 1);
-    void* conv5_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv5.bin",
-					    float_type, 256, 256, 3, 3);  
-    void* conv5_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv5_bias.bin",
-					  float_type, 1, 256, 1, 1);
-  
-    void* fc1_weights = readTrainedWeights("../model_params/alexnet_cifar10/fc1.bin",
-					   float_type, 1, 1, 4096, 10);  
-    void* fc1_bias = readTrainedWeights("../model_params/alexnet_cifar10/fc1_bias.bin",
-					float_type, 1, 10, 1, 1);  
-    
-  
-    clearTensorMap();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performance profiling 
-    startProfiling();
-
-    
-    void* conv1_out = ConvLayer_GPU(input, conv1_filter, conv1_bias,
-				    5, 5, 1, 1, 0, 2, 0, -1,1);
-
-    void* conv2_out = ConvLayer_GPU(conv1_out, conv2_filter, conv2_bias, 
-				    2, 2, 1, 1, 0, 2, 0, -1,1);
-    
-    void* conv3_out = ConvLayer_GPU(conv2_out, conv3_filter, conv3_bias, 
-				    1, 1, 1, 1, 0, 0, 0, -1,1);
-
-    void* conv4_out = ConvLayer_GPU(conv3_out, conv4_filter, conv4_bias, 
-				    1, 1, 1, 1, 0, 0, 0, -1,1);
-
-    void* conv5_out = ConvLayer_GPU(conv4_out, conv5_filter, conv5_bias, 
-				    1, 1, 1, 1, 0, 2, 0, -1,1);
-
-    void* fc1_out = FCLayer_GPU(conv5_out, fc1_weights, fc1_bias, -1, -1,1);
-    
-    void* result = tensorSoftmax(fc1_out);
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(1);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers.cc
deleted file mode 100644
index 77b75add2bf858d56dcb2d427958bf0ea5ff20a0..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  int total_runs = 10;
-  if(Opentuner_run){
-    total_runs = 100000;
-  }
-  
-  printf("********* Lenet-2 Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000;
-
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-
-  for(int i = 0; i < total_runs; i++){
-
-    void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				  CUDNN_DATA_FLOAT,
-				  test_batch_size, 1, 28, 28);
-
-    // NOTE: Filter descriptors do NOT have batch size
-    // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-    // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-    void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin",
-					    float_type, 32, 1, 5, 5);    
-    void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin",
-					  float_type, 1, 32, 1, 1);  
-    void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin",
-					    float_type, 64, 32, 5, 5);  
-    void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin",
-					  float_type, 1, 64, 1, 1);  
-    void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin",
-					   float_type, 1, 1, 7*7*64, 1024);  
-    void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin",
-					float_type, 1, 1024, 1, 1);  
-    void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin",
-					   float_type, 1, 1, 1024, 10);  
-    void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin",
-					float_type, 1, 10, 1, 1);  
-
-
-  
-    clearTensorMap();
-  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performance profiling 
-    startProfiling();
-      
-    void* conv1_out = ConvLayer_GPU(input, conv1_filter, conv1_bias,
-				    2, 2, 1, 1, 0, 2, 0, -1,1);
-
-    void* conv2_out = ConvLayer_GPU(conv1_out, conv2_filter, 
-				    conv2_bias, 
-				    2, 2, 1, 1, 0, 2, 0, -1,1);
-
-    void* fc1_out = FCLayer_GPU(conv2_out, fc1_weights, fc1_bias,
-				0, -1,1);
-    
-    void* fc2_out = FCLayer_GPU(fc1_out, fc2_weights, fc2_bias, 
-				0, -1,1);
-
-    void* result = tensorSoftmax(fc2_out);
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers2.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers2.cc
deleted file mode 100644
index c1345ff24083a0ce20f3274afc74916968be4c06..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers2.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 100000;
-  }
-  
-  printf("********* Lenet-2 Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000;
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-
-  for(int i = 0; i < total_runs; i++){
-
-    void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				  CUDNN_DATA_FLOAT,
-				  test_batch_size, 1, 28, 28);
-
-    // NOTE: Filter descriptors do NOT have batch size
-    // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-    // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-    void* conv1_filter = readTrainedWeights("../model_params/lenet_keras2/conv1.bin",
-					    float_type, 32, 1, 5, 5);    
-    void* conv1_bias = readTrainedWeights("../model_params/lenet_keras2/conv1_bias.bin",
-					  float_type, 1, 32, 1, 1);  
-    void* conv2_filter = readTrainedWeights("../model_params/lenet_keras2/conv2.bin",
-					    float_type, 64, 32, 5, 5);  
-    void* conv2_bias = readTrainedWeights("../model_params/lenet_keras2/conv2_bias.bin",
-					  float_type, 1, 64, 1, 1);  
-    void* fc1_weights = readTrainedWeights("../model_params/lenet_keras2/fc1.bin",
-					   float_type, 1, 1, 7*7*64, 1024);  
-    void* fc1_bias = readTrainedWeights("../model_params/lenet_keras2/fc1_bias.bin",
-					float_type, 1, 1024, 1, 1);  
-    void* fc2_weights = readTrainedWeights("../model_params/lenet_keras2/fc2.bin",
-					   float_type, 1, 1, 1024, 10);  
-    void* fc2_bias = readTrainedWeights("../model_params/lenet_keras2/fc2_bias.bin",
-					float_type, 1, 10, 1, 1);  
-
-  
-    clearTensorMap();  
-
-    if(Opentuner_run){
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-    // Start power and performance profiling 
-    startProfiling();
-      
-    void* conv1_out = ConvLayer_GPU(input, conv1_filter, conv1_bias,
-				    2, 2, 1, 1, 0, 2, 0, -1,1);
-
-    void* conv2_out = ConvLayer_GPU(conv1_out, conv2_filter, 
-				    conv2_bias, 
-				    2, 2, 1, 1, 0, 2, 0, -1,1);
-
-    void* fc1_out = FCLayer_GPU(conv2_out, fc1_weights, fc1_bias,
-				0, -1,1);
-    
-    void* fc2_out = FCLayer_GPU(fc1_out, fc2_weights, fc2_bias, 
-				0, -1,1);
-
-    void* result = tensorSoftmax(fc2_out);
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers.cc
deleted file mode 100644
index df663a81759f9e096e067859f8aa487882d8835f..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 100000;
-  }
-  
-  printf("********* Test Layer source ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000;
-
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-
-  for(int i = 0; i < total_runs; i++){
-
-    void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				  CUDNN_DATA_FLOAT,
-				  test_batch_size, 1, 28, 28);
-
-    //void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin",
-    //					    float_type, 32, 1, 5, 5);    
-    //void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin",
-    //					  float_type, 1, 32, 1, 1);  
-    //void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin",
-    //				    float_type, 64, 32, 5, 5);  
-    //void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin",
-    //				  float_type, 1, 64, 1, 1);  
-    //void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin",
-    //				   float_type, 1, 1, 7*7*64, 1024);  
-    //void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin",
-    //				float_type, 1, 1024, 1, 1);  
-
-    void* fc1_weights = readTrainedWeights("../model_params/test_keras/fc1.bin",
-    					   float_type, 1, 1, 784, 500);  
-    void* fc1_bias = readTrainedWeights("../model_params/test_keras/fc1_bias.bin",
-					float_type, 1, 500, 1, 1);  
-
-    void* fc2_weights = readTrainedWeights("../model_params/test_keras/fc2.bin",
-    					   float_type, 1, 1, 500, 10);  
-    void* fc2_bias = readTrainedWeights("../model_params/test_keras/fc2_bias.bin",
-					float_type, 1, 10, 1, 1);  
-
- 
-    clearTensorMap();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performance profiling 
-    startProfiling();
-      
-    //- void* conv1_out = ConvLayer_GPU(input, conv1_filter, conv1_bias,
-    //				    2, 2, 1, 1, 0, 2, 0, -1,1);
-
-    //void* conv2_out = ConvLayer_GPU(conv1_out, conv2_filter, 
-    //				    conv2_bias, 
-    //				    2, 2, 1, 1, 0, 2, 0, -1,1);
-
-    //void* fc1_out = FCLayer_GPU(conv2_out, fc1_weights, fc1_bias,
-    //				0, -1,1);
-
-    void* fc1_out = FCLayer_GPU(input, fc1_weights, fc1_bias, 0, -1,1);
-
-    void* fc2_out = FCLayer_GPU(fc1_out, fc2_weights, fc2_bias, 0, -1,1);
-
-    void* result = tensorSoftmax(fc2_out);
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers2.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers2.cc
deleted file mode 100644
index 168025d42579e7b2bced6d7c34866e7c275cd739..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers2.cc
+++ /dev/null
@@ -1,155 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 100000;
-  }
-  
-  printf("********* Lenet-2 Architecture ********** \n");
-  
-  int test_batch_size = 10000;
-
-  uint8_t* labels = readLabels("../model_params/test_keras/test_labels.bin", test_batch_size);
-
-  for(int i = 0; i < total_runs; i++){
-
-    void* input = readTrainedWeights("../model_params/cifar_keras/input.bin",
-				     float_type,
-				     test_batch_size, 3, 32, 32);
-   
-    void* conv1_filter = readTrainedWeights("../model_params/cifar10/conv1.bin",
-					    float_type, 32, 3, 3, 3);  
-    void* conv1_bias = readTrainedWeights("../model_params/cifar10/conv1_bias.bin",
-					  float_type, 1, 32, 1, 1);  
-    void* conv2_filter = readTrainedWeights("../model_params/cifar10/conv2.bin",
-					    float_type, 64, 32, 3, 3);  
-    void* conv2_bias = readTrainedWeights("../model_params/cifar10/conv2_bias.bin",
-					  float_type, 1, 64, 1, 1);
-    void* conv3_filter = readTrainedWeights("../model_params/cifar10/conv3.bin",
-					    float_type, 128, 64, 3, 3);  
-    void* conv3_bias = readTrainedWeights("../model_params/cifar10/conv3_bias.bin",
-					  float_type, 1, 128, 1, 1);  
-    void* conv4_filter = readTrainedWeights("../model_params/cifar10/conv4.bin",
-					    float_type, 128, 128, 3, 3);  
-    void* conv4_bias = readTrainedWeights("../model_params/cifar10/conv4_bias.bin",
-					  float_type, 1, 128, 1, 1);
-
-  
-    void* fc1_weights = readTrainedWeights("../model_params/test_keras/cifar_fc1.bin",
-					   float_type, 1, 1, 3*32*32, 10);  
-    void* fc1_bias = readTrainedWeights("../model_params/test_keras/cifar_fc1_bias.bin",
-					float_type, 1, 10, 1, 1);  
-    void* fc2_weights = readTrainedWeights("../model_params/test_keras/cifar_fc2.bin",
-					   float_type, 1, 1, 500, 10);  
-    void* fc2_bias = readTrainedWeights("../model_params/test_keras/cifar_fc2_bias.bin",
-					float_type, 1, 10, 1, 1);  
-
-  
-    clearTensorMap();
-  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-    // Start power and performance profiling 
-    startProfiling();
-
-    /*    
-    void* conv1_out = ConvLayer_GPU(input, conv1_filter, conv1_bias,
-				    1, 1, 1, 1, 0, 0, 0, -1,1);
-
-    void* conv2_out = ConvLayer_GPU(conv1_out, conv2_filter, conv2_bias, 
-				    1, 1, 1, 1, 0, 2, 0, -1,1);
-    
-    void* conv3_out = ConvLayer_GPU(conv2_out, conv3_filter, conv3_bias, 
-				    1, 1, 1, 1, 0, 2, 0, -1,1);
-
-    void* conv4_out = ConvLayer_GPU(conv3_out, conv4_filter, conv4_bias, 
-				    1, 1, 1, 1, 0, 2, 0, -1,1);
-    */
-
-    void* fc1_out = FCLayer_GPU(input, fc1_weights, NULL, -1, -1,1);
-    //-- void* fc1_out = tensorGemmGPU(input, fc1_weights);  
-
-    
-    //void* fc2_out = FCLayer_GPU(fc1_out, fc2_weights, fc2_bias, 0, -1,1);
-
-    void* result = tensorSoftmax(fc1_out);
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }    
-  }
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/lenet_perf.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/lenet_perf.cc
deleted file mode 100644
index 7c9583f291ea908c4c89a8b56045e06585a4f83a..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/lenet_perf.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-int total_runs = 1;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* Lenet-2 Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 1000;
-
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-  
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				CUDNN_DATA_FLOAT,
-				test_batch_size, 1, 28, 28);
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin",
-					  float_type, 32, 1, 5, 5);    
-  void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin",
-					  float_type, 64, 32, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin",
-					 float_type, 1, 1, 7*7*64, 1024);  
-  void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin",
-				      float_type, 1, 1024, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin",
-					 float_type, 1, 1, 1024, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performnce profiling 
-    startProfiling();
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    //void* conv1out = tensorConvPerfCuda(input, conv1_filter, 2, 2, 1, 1,
-    //				conv_mode, conv_precision, 2, 2, 1);
-
-    void* conv1out = tensorConvSampSim(input, conv1_filter, 2, 2, 1, 1,
-    				       conv_mode, conv_precision, 4, 0);
-
-    // NOTE: For tensorAdd, the only dimension that MUST match is channels  
-    tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
-
-    void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
-
-    void* conv1_tanh = tensorTanh(pool1out);
-
-    // NOTE: input channels have to match between tensor op inputs and outputs 
-    //void* conv2out = tensorConvPerfCuda(conv1_tanh, conv2_filter, 2, 2, 1, 1,
-    //				conv_mode, conv_precision, 1, 2, 1);
-
-    void* conv2out = tensorConvSampSim(conv1_tanh, conv2_filter, 2, 2, 1, 1,
-				       conv_mode, conv_precision, 2, 0);
-    
-    tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
-
-    void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
-
-    void* conv2_tanh = tensorTanh(pool2out);
-
-    void* gemm1out = tensorGemmGPU(conv2_tanh, fc1_weights);  
-
-    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-
-    void* tanh1out = tensorTanh(gemm1biasout);
-  
-    void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights);  
-  
-    void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
-
-    void* tanh2out = tensorTanh(gemm2_biasout);
-  
-    void* result = tensorSoftmax(tanh2out);
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    float accuracy = computeAccuracy2(labels, test_batch_size, result);
-    dumpFinalAccuracy(accuracy); 
-
-    
-    //FIXME: remove the comment below to use piped autotuner
-    //dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-  dumpExecutionAccuracies();
-
-  
-}
-
-
-
-int main(int argc, char* argv[]){
-
-  if (argc > 1){
-    total_runs = atoi(argv[1]);
-  }
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/mobilenetv2_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/mobilenetv2_cifar10.cc
deleted file mode 100644
index fcbb17f411adaf5e46162a0524efc97c90174506..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/mobilenetv2_cifar10.cc
+++ /dev/null
@@ -1,721 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/mobilenetv2_quant/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,32,1,1); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,16,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,16,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,16,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,96,16,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,96,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,96,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,96,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,24,96,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,24,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,24,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,24,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,24,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,144,24,1,1); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,144,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,144,1,3,3); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,144,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,24,144,1,1); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,24,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,24,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,24,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,24,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,144,24,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,144,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,144,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,144,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,144,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,144,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,192,32,1,1); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,192,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,192,1,3,3); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,192,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,192,1,1); 
-  std::string batch_normalization_14_gamma_path =  dir_prefix + std::string("batch_normalization_14_gamma.bin"); 
-  void* batch_normalization_14_gamma =  readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_14_beta_path =  dir_prefix + std::string("batch_normalization_14_beta.bin"); 
-  void* batch_normalization_14_beta =  readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_14_mean_path =  dir_prefix + std::string("batch_normalization_14_mean.bin"); 
-  void* batch_normalization_14_mean =  readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_14_variance_path =  dir_prefix + std::string("batch_normalization_14_variance.bin"); 
-  void* batch_normalization_14_variance =  readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,192,32,1,1); 
-  std::string batch_normalization_15_gamma_path =  dir_prefix + std::string("batch_normalization_15_gamma.bin"); 
-  void* batch_normalization_15_gamma =  readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_15_beta_path =  dir_prefix + std::string("batch_normalization_15_beta.bin"); 
-  void* batch_normalization_15_beta =  readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_15_mean_path =  dir_prefix + std::string("batch_normalization_15_mean.bin"); 
-  void* batch_normalization_15_mean =  readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_15_variance_path =  dir_prefix + std::string("batch_normalization_15_variance.bin"); 
-  void* batch_normalization_15_variance =  readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,192,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,192,1,3,3); 
-  std::string batch_normalization_16_gamma_path =  dir_prefix + std::string("batch_normalization_16_gamma.bin"); 
-  void* batch_normalization_16_gamma =  readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_16_beta_path =  dir_prefix + std::string("batch_normalization_16_beta.bin"); 
-  void* batch_normalization_16_beta =  readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_16_mean_path =  dir_prefix + std::string("batch_normalization_16_mean.bin"); 
-  void* batch_normalization_16_mean =  readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_16_variance_path =  dir_prefix + std::string("batch_normalization_16_variance.bin"); 
-  void* batch_normalization_16_variance =  readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,192,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,192,1,1); 
-  std::string batch_normalization_17_gamma_path =  dir_prefix + std::string("batch_normalization_17_gamma.bin"); 
-  void* batch_normalization_17_gamma =  readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_17_beta_path =  dir_prefix + std::string("batch_normalization_17_beta.bin"); 
-  void* batch_normalization_17_beta =  readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_17_mean_path =  dir_prefix + std::string("batch_normalization_17_mean.bin"); 
-  void* batch_normalization_17_mean =  readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_17_variance_path =  dir_prefix + std::string("batch_normalization_17_variance.bin"); 
-  void* batch_normalization_17_variance =  readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,192,32,1,1); 
-  std::string batch_normalization_18_gamma_path =  dir_prefix + std::string("batch_normalization_18_gamma.bin"); 
-  void* batch_normalization_18_gamma =  readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_18_beta_path =  dir_prefix + std::string("batch_normalization_18_beta.bin"); 
-  void* batch_normalization_18_beta =  readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_18_mean_path =  dir_prefix + std::string("batch_normalization_18_mean.bin"); 
-  void* batch_normalization_18_mean =  readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_18_variance_path =  dir_prefix + std::string("batch_normalization_18_variance.bin"); 
-  void* batch_normalization_18_variance =  readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,192,1,1); 
-  std::string depthwise_conv2d_7_w_path =  dir_prefix + std::string("depthwise_conv2d_7_w.bin"); 
-  void* depthwise_conv2d_7_w =  readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,192,1,3,3); 
-  std::string batch_normalization_19_gamma_path =  dir_prefix + std::string("batch_normalization_19_gamma.bin"); 
-  void* batch_normalization_19_gamma =  readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_19_beta_path =  dir_prefix + std::string("batch_normalization_19_beta.bin"); 
-  void* batch_normalization_19_beta =  readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_19_mean_path =  dir_prefix + std::string("batch_normalization_19_mean.bin"); 
-  void* batch_normalization_19_mean =  readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,192,1,1); 
-  std::string batch_normalization_19_variance_path =  dir_prefix + std::string("batch_normalization_19_variance.bin"); 
-  void* batch_normalization_19_variance =  readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,192,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,64,192,1,1); 
-  std::string batch_normalization_20_gamma_path =  dir_prefix + std::string("batch_normalization_20_gamma.bin"); 
-  void* batch_normalization_20_gamma =  readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_20_beta_path =  dir_prefix + std::string("batch_normalization_20_beta.bin"); 
-  void* batch_normalization_20_beta =  readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_20_mean_path =  dir_prefix + std::string("batch_normalization_20_mean.bin"); 
-  void* batch_normalization_20_mean =  readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_20_variance_path =  dir_prefix + std::string("batch_normalization_20_variance.bin"); 
-  void* batch_normalization_20_variance =  readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_15_w_path =  dir_prefix + std::string("conv2d_15_w.bin"); 
-  void* conv2d_15_w =  readTrainedWeights(conv2d_15_w_path.c_str(), 0,384,64,1,1); 
-  std::string batch_normalization_21_gamma_path =  dir_prefix + std::string("batch_normalization_21_gamma.bin"); 
-  void* batch_normalization_21_gamma =  readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_21_beta_path =  dir_prefix + std::string("batch_normalization_21_beta.bin"); 
-  void* batch_normalization_21_beta =  readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_21_mean_path =  dir_prefix + std::string("batch_normalization_21_mean.bin"); 
-  void* batch_normalization_21_mean =  readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_21_variance_path =  dir_prefix + std::string("batch_normalization_21_variance.bin"); 
-  void* batch_normalization_21_variance =  readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,384,1,1); 
-  std::string depthwise_conv2d_8_w_path =  dir_prefix + std::string("depthwise_conv2d_8_w.bin"); 
-  void* depthwise_conv2d_8_w =  readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,384,1,3,3); 
-  std::string batch_normalization_22_gamma_path =  dir_prefix + std::string("batch_normalization_22_gamma.bin"); 
-  void* batch_normalization_22_gamma =  readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_22_beta_path =  dir_prefix + std::string("batch_normalization_22_beta.bin"); 
-  void* batch_normalization_22_beta =  readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_22_mean_path =  dir_prefix + std::string("batch_normalization_22_mean.bin"); 
-  void* batch_normalization_22_mean =  readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_22_variance_path =  dir_prefix + std::string("batch_normalization_22_variance.bin"); 
-  void* batch_normalization_22_variance =  readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,384,1,1); 
-  std::string conv2d_16_w_path =  dir_prefix + std::string("conv2d_16_w.bin"); 
-  void* conv2d_16_w =  readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,384,1,1); 
-  std::string batch_normalization_23_gamma_path =  dir_prefix + std::string("batch_normalization_23_gamma.bin"); 
-  void* batch_normalization_23_gamma =  readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_23_beta_path =  dir_prefix + std::string("batch_normalization_23_beta.bin"); 
-  void* batch_normalization_23_beta =  readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_23_mean_path =  dir_prefix + std::string("batch_normalization_23_mean.bin"); 
-  void* batch_normalization_23_mean =  readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_23_variance_path =  dir_prefix + std::string("batch_normalization_23_variance.bin"); 
-  void* batch_normalization_23_variance =  readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_17_w_path =  dir_prefix + std::string("conv2d_17_w.bin"); 
-  void* conv2d_17_w =  readTrainedWeights(conv2d_17_w_path.c_str(), 0,384,64,1,1); 
-  std::string batch_normalization_24_gamma_path =  dir_prefix + std::string("batch_normalization_24_gamma.bin"); 
-  void* batch_normalization_24_gamma =  readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_24_beta_path =  dir_prefix + std::string("batch_normalization_24_beta.bin"); 
-  void* batch_normalization_24_beta =  readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_24_mean_path =  dir_prefix + std::string("batch_normalization_24_mean.bin"); 
-  void* batch_normalization_24_mean =  readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_24_variance_path =  dir_prefix + std::string("batch_normalization_24_variance.bin"); 
-  void* batch_normalization_24_variance =  readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,384,1,1); 
-  std::string depthwise_conv2d_9_w_path =  dir_prefix + std::string("depthwise_conv2d_9_w.bin"); 
-  void* depthwise_conv2d_9_w =  readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,384,1,3,3); 
-  std::string batch_normalization_25_gamma_path =  dir_prefix + std::string("batch_normalization_25_gamma.bin"); 
-  void* batch_normalization_25_gamma =  readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_25_beta_path =  dir_prefix + std::string("batch_normalization_25_beta.bin"); 
-  void* batch_normalization_25_beta =  readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_25_mean_path =  dir_prefix + std::string("batch_normalization_25_mean.bin"); 
-  void* batch_normalization_25_mean =  readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_25_variance_path =  dir_prefix + std::string("batch_normalization_25_variance.bin"); 
-  void* batch_normalization_25_variance =  readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,384,1,1); 
-  std::string conv2d_18_w_path =  dir_prefix + std::string("conv2d_18_w.bin"); 
-  void* conv2d_18_w =  readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,384,1,1); 
-  std::string batch_normalization_26_gamma_path =  dir_prefix + std::string("batch_normalization_26_gamma.bin"); 
-  void* batch_normalization_26_gamma =  readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_26_beta_path =  dir_prefix + std::string("batch_normalization_26_beta.bin"); 
-  void* batch_normalization_26_beta =  readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_26_mean_path =  dir_prefix + std::string("batch_normalization_26_mean.bin"); 
-  void* batch_normalization_26_mean =  readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_26_variance_path =  dir_prefix + std::string("batch_normalization_26_variance.bin"); 
-  void* batch_normalization_26_variance =  readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_19_w_path =  dir_prefix + std::string("conv2d_19_w.bin"); 
-  void* conv2d_19_w =  readTrainedWeights(conv2d_19_w_path.c_str(), 0,384,64,1,1); 
-  std::string batch_normalization_27_gamma_path =  dir_prefix + std::string("batch_normalization_27_gamma.bin"); 
-  void* batch_normalization_27_gamma =  readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_27_beta_path =  dir_prefix + std::string("batch_normalization_27_beta.bin"); 
-  void* batch_normalization_27_beta =  readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_27_mean_path =  dir_prefix + std::string("batch_normalization_27_mean.bin"); 
-  void* batch_normalization_27_mean =  readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_27_variance_path =  dir_prefix + std::string("batch_normalization_27_variance.bin"); 
-  void* batch_normalization_27_variance =  readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,384,1,1); 
-  std::string depthwise_conv2d_10_w_path =  dir_prefix + std::string("depthwise_conv2d_10_w.bin"); 
-  void* depthwise_conv2d_10_w =  readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,384,1,3,3); 
-  std::string batch_normalization_28_gamma_path =  dir_prefix + std::string("batch_normalization_28_gamma.bin"); 
-  void* batch_normalization_28_gamma =  readTrainedWeights(batch_normalization_28_gamma_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_28_beta_path =  dir_prefix + std::string("batch_normalization_28_beta.bin"); 
-  void* batch_normalization_28_beta =  readTrainedWeights(batch_normalization_28_beta_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_28_mean_path =  dir_prefix + std::string("batch_normalization_28_mean.bin"); 
-  void* batch_normalization_28_mean =  readTrainedWeights(batch_normalization_28_mean_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_28_variance_path =  dir_prefix + std::string("batch_normalization_28_variance.bin"); 
-  void* batch_normalization_28_variance =  readTrainedWeights(batch_normalization_28_variance_path.c_str(), 0,1,384,1,1); 
-  std::string conv2d_20_w_path =  dir_prefix + std::string("conv2d_20_w.bin"); 
-  void* conv2d_20_w =  readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,384,1,1); 
-  std::string batch_normalization_29_gamma_path =  dir_prefix + std::string("batch_normalization_29_gamma.bin"); 
-  void* batch_normalization_29_gamma =  readTrainedWeights(batch_normalization_29_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_29_beta_path =  dir_prefix + std::string("batch_normalization_29_beta.bin"); 
-  void* batch_normalization_29_beta =  readTrainedWeights(batch_normalization_29_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_29_mean_path =  dir_prefix + std::string("batch_normalization_29_mean.bin"); 
-  void* batch_normalization_29_mean =  readTrainedWeights(batch_normalization_29_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_29_variance_path =  dir_prefix + std::string("batch_normalization_29_variance.bin"); 
-  void* batch_normalization_29_variance =  readTrainedWeights(batch_normalization_29_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_21_w_path =  dir_prefix + std::string("conv2d_21_w.bin"); 
-  void* conv2d_21_w =  readTrainedWeights(conv2d_21_w_path.c_str(), 0,384,64,1,1); 
-  std::string batch_normalization_30_gamma_path =  dir_prefix + std::string("batch_normalization_30_gamma.bin"); 
-  void* batch_normalization_30_gamma =  readTrainedWeights(batch_normalization_30_gamma_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_30_beta_path =  dir_prefix + std::string("batch_normalization_30_beta.bin"); 
-  void* batch_normalization_30_beta =  readTrainedWeights(batch_normalization_30_beta_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_30_mean_path =  dir_prefix + std::string("batch_normalization_30_mean.bin"); 
-  void* batch_normalization_30_mean =  readTrainedWeights(batch_normalization_30_mean_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_30_variance_path =  dir_prefix + std::string("batch_normalization_30_variance.bin"); 
-  void* batch_normalization_30_variance =  readTrainedWeights(batch_normalization_30_variance_path.c_str(), 0,1,384,1,1); 
-  std::string depthwise_conv2d_11_w_path =  dir_prefix + std::string("depthwise_conv2d_11_w.bin"); 
-  void* depthwise_conv2d_11_w =  readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,384,1,3,3); 
-  std::string batch_normalization_31_gamma_path =  dir_prefix + std::string("batch_normalization_31_gamma.bin"); 
-  void* batch_normalization_31_gamma =  readTrainedWeights(batch_normalization_31_gamma_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_31_beta_path =  dir_prefix + std::string("batch_normalization_31_beta.bin"); 
-  void* batch_normalization_31_beta =  readTrainedWeights(batch_normalization_31_beta_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_31_mean_path =  dir_prefix + std::string("batch_normalization_31_mean.bin"); 
-  void* batch_normalization_31_mean =  readTrainedWeights(batch_normalization_31_mean_path.c_str(), 0,1,384,1,1); 
-  std::string batch_normalization_31_variance_path =  dir_prefix + std::string("batch_normalization_31_variance.bin"); 
-  void* batch_normalization_31_variance =  readTrainedWeights(batch_normalization_31_variance_path.c_str(), 0,1,384,1,1); 
-  std::string conv2d_22_w_path =  dir_prefix + std::string("conv2d_22_w.bin"); 
-  void* conv2d_22_w =  readTrainedWeights(conv2d_22_w_path.c_str(), 0,96,384,1,1); 
-  std::string batch_normalization_32_gamma_path =  dir_prefix + std::string("batch_normalization_32_gamma.bin"); 
-  void* batch_normalization_32_gamma =  readTrainedWeights(batch_normalization_32_gamma_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_32_beta_path =  dir_prefix + std::string("batch_normalization_32_beta.bin"); 
-  void* batch_normalization_32_beta =  readTrainedWeights(batch_normalization_32_beta_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_32_mean_path =  dir_prefix + std::string("batch_normalization_32_mean.bin"); 
-  void* batch_normalization_32_mean =  readTrainedWeights(batch_normalization_32_mean_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_32_variance_path =  dir_prefix + std::string("batch_normalization_32_variance.bin"); 
-  void* batch_normalization_32_variance =  readTrainedWeights(batch_normalization_32_variance_path.c_str(), 0,1,96,1,1); 
-  std::string conv2d_23_w_path =  dir_prefix + std::string("conv2d_23_w.bin"); 
-  void* conv2d_23_w =  readTrainedWeights(conv2d_23_w_path.c_str(), 0,576,96,1,1); 
-  std::string batch_normalization_33_gamma_path =  dir_prefix + std::string("batch_normalization_33_gamma.bin"); 
-  void* batch_normalization_33_gamma =  readTrainedWeights(batch_normalization_33_gamma_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_33_beta_path =  dir_prefix + std::string("batch_normalization_33_beta.bin"); 
-  void* batch_normalization_33_beta =  readTrainedWeights(batch_normalization_33_beta_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_33_mean_path =  dir_prefix + std::string("batch_normalization_33_mean.bin"); 
-  void* batch_normalization_33_mean =  readTrainedWeights(batch_normalization_33_mean_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_33_variance_path =  dir_prefix + std::string("batch_normalization_33_variance.bin"); 
-  void* batch_normalization_33_variance =  readTrainedWeights(batch_normalization_33_variance_path.c_str(), 0,1,576,1,1); 
-  std::string depthwise_conv2d_12_w_path =  dir_prefix + std::string("depthwise_conv2d_12_w.bin"); 
-  void* depthwise_conv2d_12_w =  readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,576,1,3,3); 
-  std::string batch_normalization_34_gamma_path =  dir_prefix + std::string("batch_normalization_34_gamma.bin"); 
-  void* batch_normalization_34_gamma =  readTrainedWeights(batch_normalization_34_gamma_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_34_beta_path =  dir_prefix + std::string("batch_normalization_34_beta.bin"); 
-  void* batch_normalization_34_beta =  readTrainedWeights(batch_normalization_34_beta_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_34_mean_path =  dir_prefix + std::string("batch_normalization_34_mean.bin"); 
-  void* batch_normalization_34_mean =  readTrainedWeights(batch_normalization_34_mean_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_34_variance_path =  dir_prefix + std::string("batch_normalization_34_variance.bin"); 
-  void* batch_normalization_34_variance =  readTrainedWeights(batch_normalization_34_variance_path.c_str(), 0,1,576,1,1); 
-  std::string conv2d_24_w_path =  dir_prefix + std::string("conv2d_24_w.bin"); 
-  void* conv2d_24_w =  readTrainedWeights(conv2d_24_w_path.c_str(), 0,96,576,1,1); 
-  std::string batch_normalization_35_gamma_path =  dir_prefix + std::string("batch_normalization_35_gamma.bin"); 
-  void* batch_normalization_35_gamma =  readTrainedWeights(batch_normalization_35_gamma_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_35_beta_path =  dir_prefix + std::string("batch_normalization_35_beta.bin"); 
-  void* batch_normalization_35_beta =  readTrainedWeights(batch_normalization_35_beta_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_35_mean_path =  dir_prefix + std::string("batch_normalization_35_mean.bin"); 
-  void* batch_normalization_35_mean =  readTrainedWeights(batch_normalization_35_mean_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_35_variance_path =  dir_prefix + std::string("batch_normalization_35_variance.bin"); 
-  void* batch_normalization_35_variance =  readTrainedWeights(batch_normalization_35_variance_path.c_str(), 0,1,96,1,1); 
-  std::string conv2d_25_w_path =  dir_prefix + std::string("conv2d_25_w.bin"); 
-  void* conv2d_25_w =  readTrainedWeights(conv2d_25_w_path.c_str(), 0,576,96,1,1); 
-  std::string batch_normalization_36_gamma_path =  dir_prefix + std::string("batch_normalization_36_gamma.bin"); 
-  void* batch_normalization_36_gamma =  readTrainedWeights(batch_normalization_36_gamma_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_36_beta_path =  dir_prefix + std::string("batch_normalization_36_beta.bin"); 
-  void* batch_normalization_36_beta =  readTrainedWeights(batch_normalization_36_beta_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_36_mean_path =  dir_prefix + std::string("batch_normalization_36_mean.bin"); 
-  void* batch_normalization_36_mean =  readTrainedWeights(batch_normalization_36_mean_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_36_variance_path =  dir_prefix + std::string("batch_normalization_36_variance.bin"); 
-  void* batch_normalization_36_variance =  readTrainedWeights(batch_normalization_36_variance_path.c_str(), 0,1,576,1,1); 
-  std::string depthwise_conv2d_13_w_path =  dir_prefix + std::string("depthwise_conv2d_13_w.bin"); 
-  void* depthwise_conv2d_13_w =  readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,576,1,3,3); 
-  std::string batch_normalization_37_gamma_path =  dir_prefix + std::string("batch_normalization_37_gamma.bin"); 
-  void* batch_normalization_37_gamma =  readTrainedWeights(batch_normalization_37_gamma_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_37_beta_path =  dir_prefix + std::string("batch_normalization_37_beta.bin"); 
-  void* batch_normalization_37_beta =  readTrainedWeights(batch_normalization_37_beta_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_37_mean_path =  dir_prefix + std::string("batch_normalization_37_mean.bin"); 
-  void* batch_normalization_37_mean =  readTrainedWeights(batch_normalization_37_mean_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_37_variance_path =  dir_prefix + std::string("batch_normalization_37_variance.bin"); 
-  void* batch_normalization_37_variance =  readTrainedWeights(batch_normalization_37_variance_path.c_str(), 0,1,576,1,1); 
-  std::string conv2d_26_w_path =  dir_prefix + std::string("conv2d_26_w.bin"); 
-  void* conv2d_26_w =  readTrainedWeights(conv2d_26_w_path.c_str(), 0,96,576,1,1); 
-  std::string batch_normalization_38_gamma_path =  dir_prefix + std::string("batch_normalization_38_gamma.bin"); 
-  void* batch_normalization_38_gamma =  readTrainedWeights(batch_normalization_38_gamma_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_38_beta_path =  dir_prefix + std::string("batch_normalization_38_beta.bin"); 
-  void* batch_normalization_38_beta =  readTrainedWeights(batch_normalization_38_beta_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_38_mean_path =  dir_prefix + std::string("batch_normalization_38_mean.bin"); 
-  void* batch_normalization_38_mean =  readTrainedWeights(batch_normalization_38_mean_path.c_str(), 0,1,96,1,1); 
-  std::string batch_normalization_38_variance_path =  dir_prefix + std::string("batch_normalization_38_variance.bin"); 
-  void* batch_normalization_38_variance =  readTrainedWeights(batch_normalization_38_variance_path.c_str(), 0,1,96,1,1); 
-  std::string conv2d_27_w_path =  dir_prefix + std::string("conv2d_27_w.bin"); 
-  void* conv2d_27_w =  readTrainedWeights(conv2d_27_w_path.c_str(), 0,576,96,1,1); 
-  std::string batch_normalization_39_gamma_path =  dir_prefix + std::string("batch_normalization_39_gamma.bin"); 
-  void* batch_normalization_39_gamma =  readTrainedWeights(batch_normalization_39_gamma_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_39_beta_path =  dir_prefix + std::string("batch_normalization_39_beta.bin"); 
-  void* batch_normalization_39_beta =  readTrainedWeights(batch_normalization_39_beta_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_39_mean_path =  dir_prefix + std::string("batch_normalization_39_mean.bin"); 
-  void* batch_normalization_39_mean =  readTrainedWeights(batch_normalization_39_mean_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_39_variance_path =  dir_prefix + std::string("batch_normalization_39_variance.bin"); 
-  void* batch_normalization_39_variance =  readTrainedWeights(batch_normalization_39_variance_path.c_str(), 0,1,576,1,1); 
-  std::string depthwise_conv2d_14_w_path =  dir_prefix + std::string("depthwise_conv2d_14_w.bin"); 
-  void* depthwise_conv2d_14_w =  readTrainedWeights(depthwise_conv2d_14_w_path.c_str(), 0,576,1,3,3); 
-  std::string batch_normalization_40_gamma_path =  dir_prefix + std::string("batch_normalization_40_gamma.bin"); 
-  void* batch_normalization_40_gamma =  readTrainedWeights(batch_normalization_40_gamma_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_40_beta_path =  dir_prefix + std::string("batch_normalization_40_beta.bin"); 
-  void* batch_normalization_40_beta =  readTrainedWeights(batch_normalization_40_beta_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_40_mean_path =  dir_prefix + std::string("batch_normalization_40_mean.bin"); 
-  void* batch_normalization_40_mean =  readTrainedWeights(batch_normalization_40_mean_path.c_str(), 0,1,576,1,1); 
-  std::string batch_normalization_40_variance_path =  dir_prefix + std::string("batch_normalization_40_variance.bin"); 
-  void* batch_normalization_40_variance =  readTrainedWeights(batch_normalization_40_variance_path.c_str(), 0,1,576,1,1); 
-  std::string conv2d_28_w_path =  dir_prefix + std::string("conv2d_28_w.bin"); 
-  void* conv2d_28_w =  readTrainedWeights(conv2d_28_w_path.c_str(), 0,160,576,1,1); 
-  std::string batch_normalization_41_gamma_path =  dir_prefix + std::string("batch_normalization_41_gamma.bin"); 
-  void* batch_normalization_41_gamma =  readTrainedWeights(batch_normalization_41_gamma_path.c_str(), 0,1,160,1,1); 
-  std::string batch_normalization_41_beta_path =  dir_prefix + std::string("batch_normalization_41_beta.bin"); 
-  void* batch_normalization_41_beta =  readTrainedWeights(batch_normalization_41_beta_path.c_str(), 0,1,160,1,1); 
-  std::string batch_normalization_41_mean_path =  dir_prefix + std::string("batch_normalization_41_mean.bin"); 
-  void* batch_normalization_41_mean =  readTrainedWeights(batch_normalization_41_mean_path.c_str(), 0,1,160,1,1); 
-  std::string batch_normalization_41_variance_path =  dir_prefix + std::string("batch_normalization_41_variance.bin"); 
-  void* batch_normalization_41_variance =  readTrainedWeights(batch_normalization_41_variance_path.c_str(), 0,1,160,1,1); 
-  std::string conv2d_29_w_path =  dir_prefix + std::string("conv2d_29_w.bin"); 
-  void* conv2d_29_w =  readTrainedWeights(conv2d_29_w_path.c_str(), 0,960,160,1,1); 
-  std::string batch_normalization_42_gamma_path =  dir_prefix + std::string("batch_normalization_42_gamma.bin"); 
-  void* batch_normalization_42_gamma =  readTrainedWeights(batch_normalization_42_gamma_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_42_beta_path =  dir_prefix + std::string("batch_normalization_42_beta.bin"); 
-  void* batch_normalization_42_beta =  readTrainedWeights(batch_normalization_42_beta_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_42_mean_path =  dir_prefix + std::string("batch_normalization_42_mean.bin"); 
-  void* batch_normalization_42_mean =  readTrainedWeights(batch_normalization_42_mean_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_42_variance_path =  dir_prefix + std::string("batch_normalization_42_variance.bin"); 
-  void* batch_normalization_42_variance =  readTrainedWeights(batch_normalization_42_variance_path.c_str(), 0,1,960,1,1); 
-  std::string depthwise_conv2d_15_w_path =  dir_prefix + std::string("depthwise_conv2d_15_w.bin"); 
-  void* depthwise_conv2d_15_w =  readTrainedWeights(depthwise_conv2d_15_w_path.c_str(), 0,960,1,3,3); 
-  std::string batch_normalization_43_gamma_path =  dir_prefix + std::string("batch_normalization_43_gamma.bin"); 
-  void* batch_normalization_43_gamma =  readTrainedWeights(batch_normalization_43_gamma_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_43_beta_path =  dir_prefix + std::string("batch_normalization_43_beta.bin"); 
-  void* batch_normalization_43_beta =  readTrainedWeights(batch_normalization_43_beta_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_43_mean_path =  dir_prefix + std::string("batch_normalization_43_mean.bin"); 
-  void* batch_normalization_43_mean =  readTrainedWeights(batch_normalization_43_mean_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_43_variance_path =  dir_prefix + std::string("batch_normalization_43_variance.bin"); 
-  void* batch_normalization_43_variance =  readTrainedWeights(batch_normalization_43_variance_path.c_str(), 0,1,960,1,1); 
-  std::string conv2d_30_w_path =  dir_prefix + std::string("conv2d_30_w.bin"); 
-  void* conv2d_30_w =  readTrainedWeights(conv2d_30_w_path.c_str(), 0,160,960,1,1); 
-  std::string batch_normalization_44_gamma_path =  dir_prefix + std::string("batch_normalization_44_gamma.bin"); 
-  void* batch_normalization_44_gamma =  readTrainedWeights(batch_normalization_44_gamma_path.c_str(), 0,1,160,1,1); 
-  std::string batch_normalization_44_beta_path =  dir_prefix + std::string("batch_normalization_44_beta.bin"); 
-  void* batch_normalization_44_beta =  readTrainedWeights(batch_normalization_44_beta_path.c_str(), 0,1,160,1,1); 
-  std::string batch_normalization_44_mean_path =  dir_prefix + std::string("batch_normalization_44_mean.bin"); 
-  void* batch_normalization_44_mean =  readTrainedWeights(batch_normalization_44_mean_path.c_str(), 0,1,160,1,1); 
-  std::string batch_normalization_44_variance_path =  dir_prefix + std::string("batch_normalization_44_variance.bin"); 
-  void* batch_normalization_44_variance =  readTrainedWeights(batch_normalization_44_variance_path.c_str(), 0,1,160,1,1); 
-  std::string conv2d_31_w_path =  dir_prefix + std::string("conv2d_31_w.bin"); 
-  void* conv2d_31_w =  readTrainedWeights(conv2d_31_w_path.c_str(), 0,960,160,1,1); 
-  std::string batch_normalization_45_gamma_path =  dir_prefix + std::string("batch_normalization_45_gamma.bin"); 
-  void* batch_normalization_45_gamma =  readTrainedWeights(batch_normalization_45_gamma_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_45_beta_path =  dir_prefix + std::string("batch_normalization_45_beta.bin"); 
-  void* batch_normalization_45_beta =  readTrainedWeights(batch_normalization_45_beta_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_45_mean_path =  dir_prefix + std::string("batch_normalization_45_mean.bin"); 
-  void* batch_normalization_45_mean =  readTrainedWeights(batch_normalization_45_mean_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_45_variance_path =  dir_prefix + std::string("batch_normalization_45_variance.bin"); 
-  void* batch_normalization_45_variance =  readTrainedWeights(batch_normalization_45_variance_path.c_str(), 0,1,960,1,1); 
-  std::string depthwise_conv2d_16_w_path =  dir_prefix + std::string("depthwise_conv2d_16_w.bin"); 
-  void* depthwise_conv2d_16_w =  readTrainedWeights(depthwise_conv2d_16_w_path.c_str(), 0,960,1,3,3); 
-  std::string batch_normalization_46_gamma_path =  dir_prefix + std::string("batch_normalization_46_gamma.bin"); 
-  void* batch_normalization_46_gamma =  readTrainedWeights(batch_normalization_46_gamma_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_46_beta_path =  dir_prefix + std::string("batch_normalization_46_beta.bin"); 
-  void* batch_normalization_46_beta =  readTrainedWeights(batch_normalization_46_beta_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_46_mean_path =  dir_prefix + std::string("batch_normalization_46_mean.bin"); 
-  void* batch_normalization_46_mean =  readTrainedWeights(batch_normalization_46_mean_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_46_variance_path =  dir_prefix + std::string("batch_normalization_46_variance.bin"); 
-  void* batch_normalization_46_variance =  readTrainedWeights(batch_normalization_46_variance_path.c_str(), 0,1,960,1,1); 
-  std::string conv2d_32_w_path =  dir_prefix + std::string("conv2d_32_w.bin"); 
-  void* conv2d_32_w =  readTrainedWeights(conv2d_32_w_path.c_str(), 0,160,960,1,1); 
-  std::string batch_normalization_47_gamma_path =  dir_prefix + std::string("batch_normalization_47_gamma.bin"); 
-  void* batch_normalization_47_gamma =  readTrainedWeights(batch_normalization_47_gamma_path.c_str(), 0,1,160,1,1); 
-  std::string batch_normalization_47_beta_path =  dir_prefix + std::string("batch_normalization_47_beta.bin"); 
-  void* batch_normalization_47_beta =  readTrainedWeights(batch_normalization_47_beta_path.c_str(), 0,1,160,1,1); 
-  std::string batch_normalization_47_mean_path =  dir_prefix + std::string("batch_normalization_47_mean.bin"); 
-  void* batch_normalization_47_mean =  readTrainedWeights(batch_normalization_47_mean_path.c_str(), 0,1,160,1,1); 
-  std::string batch_normalization_47_variance_path =  dir_prefix + std::string("batch_normalization_47_variance.bin"); 
-  void* batch_normalization_47_variance =  readTrainedWeights(batch_normalization_47_variance_path.c_str(), 0,1,160,1,1); 
-  std::string conv2d_33_w_path =  dir_prefix + std::string("conv2d_33_w.bin"); 
-  void* conv2d_33_w =  readTrainedWeights(conv2d_33_w_path.c_str(), 0,960,160,1,1); 
-  std::string batch_normalization_48_gamma_path =  dir_prefix + std::string("batch_normalization_48_gamma.bin"); 
-  void* batch_normalization_48_gamma =  readTrainedWeights(batch_normalization_48_gamma_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_48_beta_path =  dir_prefix + std::string("batch_normalization_48_beta.bin"); 
-  void* batch_normalization_48_beta =  readTrainedWeights(batch_normalization_48_beta_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_48_mean_path =  dir_prefix + std::string("batch_normalization_48_mean.bin"); 
-  void* batch_normalization_48_mean =  readTrainedWeights(batch_normalization_48_mean_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_48_variance_path =  dir_prefix + std::string("batch_normalization_48_variance.bin"); 
-  void* batch_normalization_48_variance =  readTrainedWeights(batch_normalization_48_variance_path.c_str(), 0,1,960,1,1); 
-  std::string depthwise_conv2d_17_w_path =  dir_prefix + std::string("depthwise_conv2d_17_w.bin"); 
-  void* depthwise_conv2d_17_w =  readTrainedWeights(depthwise_conv2d_17_w_path.c_str(), 0,960,1,3,3); 
-  std::string batch_normalization_49_gamma_path =  dir_prefix + std::string("batch_normalization_49_gamma.bin"); 
-  void* batch_normalization_49_gamma =  readTrainedWeights(batch_normalization_49_gamma_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_49_beta_path =  dir_prefix + std::string("batch_normalization_49_beta.bin"); 
-  void* batch_normalization_49_beta =  readTrainedWeights(batch_normalization_49_beta_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_49_mean_path =  dir_prefix + std::string("batch_normalization_49_mean.bin"); 
-  void* batch_normalization_49_mean =  readTrainedWeights(batch_normalization_49_mean_path.c_str(), 0,1,960,1,1); 
-  std::string batch_normalization_49_variance_path =  dir_prefix + std::string("batch_normalization_49_variance.bin"); 
-  void* batch_normalization_49_variance =  readTrainedWeights(batch_normalization_49_variance_path.c_str(), 0,1,960,1,1); 
-  std::string conv2d_34_w_path =  dir_prefix + std::string("conv2d_34_w.bin"); 
-  void* conv2d_34_w =  readTrainedWeights(conv2d_34_w_path.c_str(), 0,320,960,1,1); 
-  std::string batch_normalization_50_gamma_path =  dir_prefix + std::string("batch_normalization_50_gamma.bin"); 
-  void* batch_normalization_50_gamma =  readTrainedWeights(batch_normalization_50_gamma_path.c_str(), 0,1,320,1,1); 
-  std::string batch_normalization_50_beta_path =  dir_prefix + std::string("batch_normalization_50_beta.bin"); 
-  void* batch_normalization_50_beta =  readTrainedWeights(batch_normalization_50_beta_path.c_str(), 0,1,320,1,1); 
-  std::string batch_normalization_50_mean_path =  dir_prefix + std::string("batch_normalization_50_mean.bin"); 
-  void* batch_normalization_50_mean =  readTrainedWeights(batch_normalization_50_mean_path.c_str(), 0,1,320,1,1); 
-  std::string batch_normalization_50_variance_path =  dir_prefix + std::string("batch_normalization_50_variance.bin"); 
-  void* batch_normalization_50_variance =  readTrainedWeights(batch_normalization_50_variance_path.c_str(), 0,1,320,1,1); 
-  std::string conv2d_35_w_path =  dir_prefix + std::string("conv2d_35_w.bin"); 
-  void* conv2d_35_w =  readTrainedWeights(conv2d_35_w_path.c_str(), 0,1280,320,1,1); 
-  std::string batch_normalization_51_gamma_path =  dir_prefix + std::string("batch_normalization_51_gamma.bin"); 
-  void* batch_normalization_51_gamma =  readTrainedWeights(batch_normalization_51_gamma_path.c_str(), 0,1,1280,1,1); 
-  std::string batch_normalization_51_beta_path =  dir_prefix + std::string("batch_normalization_51_beta.bin"); 
-  void* batch_normalization_51_beta =  readTrainedWeights(batch_normalization_51_beta_path.c_str(), 0,1,1280,1,1); 
-  std::string batch_normalization_51_mean_path =  dir_prefix + std::string("batch_normalization_51_mean.bin"); 
-  void* batch_normalization_51_mean =  readTrainedWeights(batch_normalization_51_mean_path.c_str(), 0,1,1280,1,1); 
-  std::string batch_normalization_51_variance_path =  dir_prefix + std::string("batch_normalization_51_variance.bin"); 
-  void* batch_normalization_51_variance =  readTrainedWeights(batch_normalization_51_variance_path.c_str(), 0,1,1280,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,5120,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-
-  startMemTracking(); 
-
-  int test_input_size = 10000; 
-  int batch_size = 500; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size; 
-    int end = (i + 1) * batch_size; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-    void* var_2 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-    void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-    void* var_5 = tensorBatchNorm(var_4, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-    void* var_6 = tensorRelu(var_5); 
-    void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-    void* var_8 = tensorBatchNorm(var_7, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-    void* var_9 = tensorConvolution(var_8, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-    void* var_10 = tensorBatchNorm(var_9, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-    void* var_11 = tensorRelu(var_10); 
-    void* var_13 = tensorConvolution(var_11, depthwise_conv2d_2_w, 1, 1, 1, 1, 1, 96); 
-    void* var_14 = tensorBatchNorm(var_13, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-    void* var_15 = tensorRelu(var_14); 
-    void* var_16 = tensorConvolution(var_15, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-    void* var_17 = tensorBatchNorm(var_16, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-    void* var_18 = tensorConvolution(var_17, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-    void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-    void* var_20 = tensorRelu(var_19); 
-    void* var_22 = tensorConvolution(var_20, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 144); 
-    void* var_23 = tensorBatchNorm(var_22, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-    void* var_24 = tensorRelu(var_23); 
-    void* var_25 = tensorConvolution(var_24, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-    void* var_26 = tensorBatchNorm(var_25, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-    void* var_27 = tensorAdd(var_17, var_26); 
-    void* var_28 = tensorConvolution(var_27, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-    void* var_29 = tensorBatchNorm(var_28, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-    void* var_30 = tensorRelu(var_29); 
-    void* var_32 = tensorConvolution(var_30, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 144); 
-    void* var_33 = tensorBatchNorm(var_32, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-    void* var_34 = tensorRelu(var_33); 
-    void* var_35 = tensorConvolution(var_34, conv2d_8_w, 0, 0, 1, 1, 1, 1); 
-    void* var_36 = tensorBatchNorm(var_35, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-    void* var_37 = tensorConvolution(var_36, conv2d_9_w, 0, 0, 1, 1, 1, 1); 
-    void* var_38 = tensorBatchNorm(var_37, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-    void* var_39 = tensorRelu(var_38); 
-    void* var_41 = tensorConvolution(var_39, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 192); 
-    void* var_42 = tensorBatchNorm(var_41, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-    void* var_43 = tensorRelu(var_42); 
-    void* var_44 = tensorConvolution(var_43, conv2d_10_w, 0, 0, 1, 1, 1, 1); 
-    void* var_45 = tensorBatchNorm(var_44, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); 
-    void* var_46 = tensorAdd(var_36, var_45); 
-    void* var_47 = tensorConvolution(var_46, conv2d_11_w, 0, 0, 1, 1, 1, 1); 
-    void* var_48 = tensorBatchNorm(var_47, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); 
-    void* var_49 = tensorRelu(var_48); 
-    void* var_51 = tensorConvolution(var_49, depthwise_conv2d_6_w, 1, 1, 1, 1, 1, 192); 
-    void* var_52 = tensorBatchNorm(var_51, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); 
-    void* var_53 = tensorRelu(var_52); 
-    void* var_54 = tensorConvolution(var_53, conv2d_12_w, 0, 0, 1, 1, 1, 1); 
-    void* var_55 = tensorBatchNorm(var_54, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); 
-    void* var_56 = tensorAdd(var_46, var_55); 
-    void* var_57 = tensorConvolution(var_56, conv2d_13_w, 0, 0, 1, 1, 1, 1); 
-    void* var_58 = tensorBatchNorm(var_57, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); 
-    void* var_59 = tensorRelu(var_58); 
-    void* var_61 = tensorConvolution(var_59, depthwise_conv2d_7_w, 1, 1, 2, 2, 1, 192); 
-    void* var_62 = tensorBatchNorm(var_61, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); 
-    void* var_63 = tensorRelu(var_62); 
-    void* var_64 = tensorConvolution(var_63, conv2d_14_w, 0, 0, 1, 1, 1, 1); 
-    void* var_65 = tensorBatchNorm(var_64, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); 
-    void* var_66 = tensorConvolution(var_65, conv2d_15_w, 0, 0, 1, 1, 1, 1); 
-    void* var_67 = tensorBatchNorm(var_66, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); 
-    void* var_68 = tensorRelu(var_67); 
-    void* var_70 = tensorConvolution(var_68, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 384); 
-    void* var_71 = tensorBatchNorm(var_70, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); 
-    void* var_72 = tensorRelu(var_71); 
-    void* var_73 = tensorConvolution(var_72, conv2d_16_w, 0, 0, 1, 1, 1, 1); 
-    void* var_74 = tensorBatchNorm(var_73, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); 
-    void* var_75 = tensorAdd(var_65, var_74); 
-    void* var_76 = tensorConvolution(var_75, conv2d_17_w, 0, 0, 1, 1, 1, 1); 
-    void* var_77 = tensorBatchNorm(var_76, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); 
-    void* var_78 = tensorRelu(var_77); 
-    void* var_80 = tensorConvolution(var_78, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 384); 
-    void* var_81 = tensorBatchNorm(var_80, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); 
-    void* var_82 = tensorRelu(var_81); 
-    void* var_83 = tensorConvolution(var_82, conv2d_18_w, 0, 0, 1, 1, 1, 1); 
-    void* var_84 = tensorBatchNorm(var_83, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); 
-    void* var_85 = tensorAdd(var_75, var_84); 
-    void* var_86 = tensorConvolution(var_85, conv2d_19_w, 0, 0, 1, 1, 1, 1); 
-    void* var_87 = tensorBatchNorm(var_86, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); 
-    void* var_88 = tensorRelu(var_87); 
-    void* var_90 = tensorConvolution(var_88, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 384); 
-    void* var_91 = tensorBatchNorm(var_90, batch_normalization_28_gamma, batch_normalization_28_beta, batch_normalization_28_mean, batch_normalization_28_variance, 0.001); 
-    void* var_92 = tensorRelu(var_91); 
-    void* var_93 = tensorConvolution(var_92, conv2d_20_w, 0, 0, 1, 1, 1, 1); 
-    void* var_94 = tensorBatchNorm(var_93, batch_normalization_29_gamma, batch_normalization_29_beta, batch_normalization_29_mean, batch_normalization_29_variance, 0.001); 
-    void* var_95 = tensorAdd(var_85, var_94); 
-    void* var_97 = tensorConvolution(var_95, conv2d_21_w, 0, 0, 1, 1, 1, 1); 
-    void* var_98 = tensorBatchNorm(var_97, batch_normalization_30_gamma, batch_normalization_30_beta, batch_normalization_30_mean, batch_normalization_30_variance, 0.001); 
-    void* var_99 = tensorRelu(var_98); 
-    void* var_101 = tensorConvolution(var_99, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 384); 
-    void* var_102 = tensorBatchNorm(var_101, batch_normalization_31_gamma, batch_normalization_31_beta, batch_normalization_31_mean, batch_normalization_31_variance, 0.001); 
-    void* var_103 = tensorRelu(var_102); 
-    void* var_104 = tensorConvolution(var_103, conv2d_22_w, 0, 0, 1, 1, 1, 1); 
-    void* var_105 = tensorBatchNorm(var_104, batch_normalization_32_gamma, batch_normalization_32_beta, batch_normalization_32_mean, batch_normalization_32_variance, 0.001); 
-    void* var_106 = tensorConvolution(var_105, conv2d_23_w, 0, 0, 1, 1, 1, 1); 
-    void* var_107 = tensorBatchNorm(var_106, batch_normalization_33_gamma, batch_normalization_33_beta, batch_normalization_33_mean, batch_normalization_33_variance, 0.001); 
-    void* var_108 = tensorRelu(var_107); 
-    void* var_110 = tensorConvolution(var_108, depthwise_conv2d_12_w, 1, 1, 1, 1, 1, 576); 
-    void* var_111 = tensorBatchNorm(var_110, batch_normalization_34_gamma, batch_normalization_34_beta, batch_normalization_34_mean, batch_normalization_34_variance, 0.001); 
-    void* var_112 = tensorRelu(var_111); 
-    void* var_113 = tensorConvolution(var_112, conv2d_24_w, 0, 0, 1, 1, 1, 1); 
-    void* var_114 = tensorBatchNorm(var_113, batch_normalization_35_gamma, batch_normalization_35_beta, batch_normalization_35_mean, batch_normalization_35_variance, 0.001); 
-    void* var_115 = tensorAdd(var_105, var_114); 
-    void* var_116 = tensorConvolution(var_115, conv2d_25_w, 0, 0, 1, 1, 1, 1); 
-    void* var_117 = tensorBatchNorm(var_116, batch_normalization_36_gamma, batch_normalization_36_beta, batch_normalization_36_mean, batch_normalization_36_variance, 0.001); 
-    void* var_118 = tensorRelu(var_117); 
-    void* var_120 = tensorConvolution(var_118, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 576); 
-    void* var_121 = tensorBatchNorm(var_120, batch_normalization_37_gamma, batch_normalization_37_beta, batch_normalization_37_mean, batch_normalization_37_variance, 0.001); 
-    void* var_122 = tensorRelu(var_121); 
-    void* var_123 = tensorConvolution(var_122, conv2d_26_w, 0, 0, 1, 1, 1, 1); 
-    void* var_124 = tensorBatchNorm(var_123, batch_normalization_38_gamma, batch_normalization_38_beta, batch_normalization_38_mean, batch_normalization_38_variance, 0.001); 
-    void* var_125 = tensorAdd(var_115, var_124); 
-    void* var_127 = tensorConvolution(var_125, conv2d_27_w, 0, 0, 1, 1, 1, 1); 
-    void* var_128 = tensorBatchNorm(var_127, batch_normalization_39_gamma, batch_normalization_39_beta, batch_normalization_39_mean, batch_normalization_39_variance, 0.001); 
-    void* var_129 = tensorRelu(var_128); 
-    void* var_131 = tensorConvolution(var_129, depthwise_conv2d_14_w, 1, 1, 2, 2, 1, 576); 
-    void* var_132 = tensorBatchNorm(var_131, batch_normalization_40_gamma, batch_normalization_40_beta, batch_normalization_40_mean, batch_normalization_40_variance, 0.001); 
-    void* var_133 = tensorRelu(var_132); 
-    void* var_134 = tensorConvolution(var_133, conv2d_28_w, 0, 0, 1, 1, 1, 1); 
-    void* var_135 = tensorBatchNorm(var_134, batch_normalization_41_gamma, batch_normalization_41_beta, batch_normalization_41_mean, batch_normalization_41_variance, 0.001); 
-    void* var_136 = tensorConvolution(var_135, conv2d_29_w, 0, 0, 1, 1, 1, 1); 
-    void* var_137 = tensorBatchNorm(var_136, batch_normalization_42_gamma, batch_normalization_42_beta, batch_normalization_42_mean, batch_normalization_42_variance, 0.001); 
-    void* var_138 = tensorRelu(var_137); 
-    void* var_140 = tensorConvolution(var_138, depthwise_conv2d_15_w, 1, 1, 1, 1, 1, 960); 
-    void* var_141 = tensorBatchNorm(var_140, batch_normalization_43_gamma, batch_normalization_43_beta, batch_normalization_43_mean, batch_normalization_43_variance, 0.001); 
-    void* var_142 = tensorRelu(var_141); 
-    void* var_143 = tensorConvolution(var_142, conv2d_30_w, 0, 0, 1, 1, 1, 1); 
-    void* var_144 = tensorBatchNorm(var_143, batch_normalization_44_gamma, batch_normalization_44_beta, batch_normalization_44_mean, batch_normalization_44_variance, 0.001); 
-    void* var_145 = tensorAdd(var_135, var_144); 
-    void* var_146 = tensorConvolution(var_145, conv2d_31_w, 0, 0, 1, 1, 1, 1); 
-    void* var_147 = tensorBatchNorm(var_146, batch_normalization_45_gamma, batch_normalization_45_beta, batch_normalization_45_mean, batch_normalization_45_variance, 0.001); 
-    void* var_148 = tensorRelu(var_147); 
-    void* var_150 = tensorConvolution(var_148, depthwise_conv2d_16_w, 1, 1, 1, 1, 1, 960); 
-    void* var_151 = tensorBatchNorm(var_150, batch_normalization_46_gamma, batch_normalization_46_beta, batch_normalization_46_mean, batch_normalization_46_variance, 0.001); 
-    void* var_152 = tensorRelu(var_151); 
-    void* var_153 = tensorConvolution(var_152, conv2d_32_w, 0, 0, 1, 1, 1, 1); 
-    void* var_154 = tensorBatchNorm(var_153, batch_normalization_47_gamma, batch_normalization_47_beta, batch_normalization_47_mean, batch_normalization_47_variance, 0.001); 
-    void* var_155 = tensorAdd(var_145, var_154); 
-    void* var_157 = tensorConvolution(var_155, conv2d_33_w, 0, 0, 1, 1, 1, 1); 
-    void* var_158 = tensorBatchNorm(var_157, batch_normalization_48_gamma, batch_normalization_48_beta, batch_normalization_48_mean, batch_normalization_48_variance, 0.001); 
-    void* var_159 = tensorRelu(var_158); 
-    void* var_161 = tensorConvolution(var_159, depthwise_conv2d_17_w, 1, 1, 1, 1, 1, 960); 
-    void* var_162 = tensorBatchNorm(var_161, batch_normalization_49_gamma, batch_normalization_49_beta, batch_normalization_49_mean, batch_normalization_49_variance, 0.001); 
-    void* var_163 = tensorRelu(var_162); 
-    void* var_164 = tensorConvolution(var_163, conv2d_34_w, 0, 0, 1, 1, 1, 1); 
-    void* var_165 = tensorBatchNorm(var_164, batch_normalization_50_gamma, batch_normalization_50_beta, batch_normalization_50_mean, batch_normalization_50_variance, 0.001); 
-    void* var_167 = tensorConvolution(var_165, conv2d_35_w, 0, 0, 1, 1, 1, 1); 
-    void* var_168 = tensorBatchNorm(var_167, batch_normalization_51_gamma, batch_normalization_51_beta, batch_normalization_51_mean, batch_normalization_51_variance, 0.001); 
-    void* var_169 = tensorRelu(var_168); 
-    void* var_170 = tensorPooling(var_169,1,2,2,0,0,2,2); 
-    void* var_172 = tensorGemmGPU(var_170, dense_1_w); 
-    void* var_173 = tensorAdd(var_172, dense_1_b); 
-    void* var_174 = tensorSoftmax(var_173); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, var_174); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling.cc
deleted file mode 100644
index 50df874874592a94238e596189b6a477fb66f05f..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-// Per tensor operation
-
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-/* NOTE: Reference Architecture to use for profiling */
-void testCifarNet(){
-
-  printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
- 
-  std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); 
-  std::string input_path =  dir_prefix + std::string("norm_cifar_input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("test_labels.bin"); 
-
-  void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin",
-					  float_type, 32, 3, 3, 3);  
-  void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin",
-					  float_type, 32, 32, 3, 3);  
-  void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin",
-					float_type, 1, 32, 1, 1);
-  void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin",
-					  float_type, 64, 32, 3, 3);  
-  void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin",
-					  float_type, 64, 64, 3, 3);  
-  void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin",
-					float_type, 1, 64, 1, 1);
-  void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin",
-					  float_type, 128, 64, 3, 3);  
-  void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin",
-					float_type, 1, 128, 1, 1);
-  void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin",
-					  float_type, 128, 128, 3, 3);  
-  void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin",
-					float_type, 1, 128, 1, 1);
-  
-  void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin",
-					 float_type, 1, 1, 2048, 10);  
-  void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin",
-				      float_type, 1, 10, 1, 1);  
- 
-  
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-  startMemTracking();
-
-  int total_runs = 10;
-
-  int test_input_size = 5000;
-  int batch_size = 1000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-  double total_energy = 0.0;
-
-  for(int i = 0; i < total_runs; i++){
-    for(int i = 0; i < batch_count; i++){
-      int start = i * batch_size;
-      int end = (i + 1) * batch_size;
-      void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-
-      // FIRST Tensor Runtime CALL
-      profiler.resume_profiler();
-      void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv1out, conv1_bias); 
-      void* conv1_tanh = tensorTanh(conv1out);
-
-      // 2nd Layer
-      void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv2out, conv2_bias); 
-      void* conv2_tanh = tensorTanh(conv2out);
-      void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-      // 3rd Layer
-      void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv3out, conv3_bias); 
-      void* conv3_tanh = tensorTanh(conv3out);
-
-      // 4th Layer
-      void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv4out, conv4_bias); 
-      void* conv4_tanh = tensorTanh(conv4out);
-      void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-      // 5th Layer
-      void* conv5out = tensorConvolution(pool4out, conv5_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv5out, conv5_bias); 
-      void* conv5_tanh = tensorTanh(conv5out);
-
-      // 6th Layer
-      void* conv6out = tensorConvolution(conv5_tanh, conv6_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv6out, conv6_bias); 
-      void* conv6_tanh = tensorTanh(conv6out);
-      void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-      // final FC Layer
-      void* gemm1out = tensorGemmGPU(pool6out, fc1_weights); 
-      void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-      void* result = tensorSoftmax(gemm1biasout);
-
-      profiler.pause_profiler();
-      auto time_energy = profiler.get_time_energy();
-      total_time += time_energy.first;
-      total_energy += time_energy.second;
-
-      profiler.reset();
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, result); 
-      final_accuracy += accuracy;
-    
-      freeBatchMemory();
-    }
-  }
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"Average energy: " << total_energy / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-  
-  stopProfiling();
-  final_accuracy = (final_accuracy / batch_count) / total_runs;
-  dumpFinalAccuracy(final_accuracy);
-}
-
-
-int main(int argc, char* argv[]){
-
-  llvm_hpvm_initTensorRt(0);
-
-  testCifarNet();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling_tensors.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling_tensors.cc
deleted file mode 100644
index f95a7bda4fc581e4c40d4882304156f2420f22a5..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling_tensors.cc
+++ /dev/null
@@ -1,262 +0,0 @@
-// Per tensor operation
-
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-void add_data(std::unordered_map<std::string, std::pair<double, double> >& total_time_energies, Profiler& profiler, const std::string& op_name){
-    profiler.pause_profiler();
-    auto time_energy = profiler.get_time_energy();
-
-    auto itr = total_time_energies.find(op_name);
-    if (itr == total_time_energies.end()){
-        total_time_energies.insert(std::make_pair(op_name, time_energy));
-    } else {
-        itr->second.first += time_energy.first;
-		itr->second.second += time_energy.second;
-    }
-    profiler.reset();
-}
-
-/* NOTE: Reference Architecture to use for profiling */
-void testCifarNet(){
-
-  printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
- 
-  std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); 
-  std::string input_path =  dir_prefix + std::string("norm_cifar_input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("test_labels.bin"); 
-
-  void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin",
-					  float_type, 32, 3, 3, 3);  
-  void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin",
-					  float_type, 32, 32, 3, 3);  
-  void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin",
-					float_type, 1, 32, 1, 1);
-  void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin",
-					  float_type, 64, 32, 3, 3);  
-  void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin",
-					  float_type, 64, 64, 3, 3);  
-  void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin",
-					float_type, 1, 64, 1, 1);
-  void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin",
-					  float_type, 128, 64, 3, 3);  
-  void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin",
-					float_type, 1, 128, 1, 1);
-  void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin",
-					  float_type, 128, 128, 3, 3);  
-  void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin",
-					float_type, 1, 128, 1, 1);
-  
-  void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin",
-					 float_type, 1, 1, 2048, 10);  
-  void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin",
-				      float_type, 1, 10, 1, 1);  
- 
-  
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-  std::ofstream online_profiler_output;
-  online_profiler_output.open("online_output.txt");
-
-  startMemTracking();
-
-  // NOTE: CHANGED INPUT TO STANDARDIZE
-  int total_runs = 50; // FOR NOW 100;
-
-  int test_input_size = 5000;
-  int batch_size = 1000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-
-  Profiler profiler;
-  profiler.start_profiler();
-
-  // Get the total time and energy per tensor per run 
-  std::unordered_map<std::string, std::pair<double, double> > total_time_energies;
-
-  for(int i = 0; i < total_runs; i++){
-    for(int i = 0; i < batch_count; i++){
-      int start = i * batch_size;
-      int end = (i + 1) * batch_size;
-      void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-
-      // FIRST Tensor Runtime CALL
-      profiler.resume_profiler();
-      void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      add_data(total_time_energies, profiler, "Conv1");
-       
-      profiler.resume_profiler();
-      tensorAdd(conv1out, conv1_bias); 
-      add_data(total_time_energies, profiler, "Add1");
-
-      profiler.resume_profiler();
-      void* conv1_tanh = tensorTanh(conv1out);
-      add_data(total_time_energies, profiler, "Tanh1");
-
-      // 2nd Layer
-      profiler.resume_profiler();
-      void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      add_data(total_time_energies, profiler, "Conv2");
-
-      profiler.resume_profiler();
-      tensorAdd(conv2out, conv2_bias); 
-      add_data(total_time_energies, profiler, "Add2");
-
-      profiler.resume_profiler();
-      void* conv2_tanh = tensorTanh(conv2out);
-      add_data(total_time_energies, profiler, "Tanh2");
-
-      profiler.resume_profiler();
-      void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
-      add_data(total_time_energies, profiler, "Pool1");
-
-      // 3rd Layer
-      profiler.resume_profiler();
-      void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      add_data(total_time_energies, profiler, "Conv3");
-
-      profiler.resume_profiler();
-      tensorAdd(conv3out, conv3_bias); 
-      add_data(total_time_energies, profiler, "Add3");
-
-      profiler.resume_profiler();
-      void* conv3_tanh = tensorTanh(conv3out);
-      add_data(total_time_energies, profiler, "Tanh3");
-
-      // 4th Layer
-      profiler.resume_profiler();
-      void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      add_data(total_time_energies, profiler, "Conv4");
-
-      profiler.resume_profiler();
-      tensorAdd(conv4out, conv4_bias); 
-      add_data(total_time_energies, profiler, "Add4");
-
-      profiler.resume_profiler();
-      void* conv4_tanh = tensorTanh(conv4out);
-      add_data(total_time_energies, profiler, "Tanh4");
-
-      profiler.resume_profiler();
-      void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2);
-      add_data(total_time_energies, profiler, "Pool2");
-
-      // 5th Layer
-      profiler.resume_profiler();
-      void* conv5out = tensorConvolution(pool4out, conv5_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      add_data(total_time_energies, profiler, "Conv5");
-
-      profiler.resume_profiler();
-      tensorAdd(conv5out, conv5_bias); 
-      add_data(total_time_energies, profiler, "Add5");
-
-      profiler.resume_profiler();
-      void* conv5_tanh = tensorTanh(conv5out);
-      add_data(total_time_energies, profiler, "Tanh5");
-
-      // 6th Layer
-      profiler.resume_profiler();
-      void* conv6out = tensorConvolution(conv5_tanh, conv6_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      add_data(total_time_energies, profiler, "Conv6");
-
-      profiler.resume_profiler();
-      tensorAdd(conv6out, conv6_bias); 
-      add_data(total_time_energies, profiler, "Add6");
-
-      profiler.resume_profiler();
-      void* conv6_tanh = tensorTanh(conv6out);
-      add_data(total_time_energies, profiler, "Tanh6");
-
-      profiler.resume_profiler();
-      void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2);
-      add_data(total_time_energies, profiler, "Pool3");
-
-      // final FC Layer
-      profiler.resume_profiler();
-      void* gemm1out = tensorGemmGPU(pool6out, fc1_weights); 
-      add_data(total_time_energies, profiler, "Mul1"); // ASSUMING that this is mul1
-
-      std::cout<<"-----------------------------------ADD 7--------------------------------\n";
-      profiler.resume_profiler();
-      void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-      add_data(total_time_energies, profiler, "Add7");
-      std::cout<<"-----------------------------------ADD 7 ENDS --------------------------------\n";
-
-      profiler.resume_profiler();
-      void* result = tensorSoftmax(gemm1biasout);
-      add_data(total_time_energies, profiler, "Softmax1");
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, result); 
-      final_accuracy += accuracy;
-    
-      freeBatchMemory();
-    }
-  }
-  profiler.stop_profiler();
-  
-  stopProfiling();
-  //online_profiler_output << "Total time: " << total_time << ", " << total_energy << "\n";
-  // Now compute the averages across batches
-  std::ofstream ofs;
-  std::string arr[] = {"Add1", "Add2", "Add3", "Add4", "Add5", "Add6", "Add7",
-                       "Conv1", "Conv2", "Conv3", "Conv4", "Conv5", "Conv6",
-                       "Mul1",
-                       "Pool1", "Pool2", "Pool3",
-                       "Softmax1",
-                       "Tanh1", "Tanh2", "Tanh3", "Tanh4", "Tanh5", "Tanh6"};
-  ofs.open("online_profiler_tensor_data.txt");
-  std::vector<std::string> ordered_keys(std::begin(arr), std::end(arr));
-  for (const std::string& key : ordered_keys){
-    const auto& data_pair = total_time_energies[key];
-    ofs << key << ": " << data_pair.first / total_runs << "\t" << data_pair.second / total_runs << '\n';
-    std::cout<< key << ": " << data_pair.first / total_runs << "\t" << data_pair.second / total_runs << '\n';
-  }
-   
-  /*
-  ofs.open("online_profiler_tensor_data.txt");
-  for (const auto& tensor_data : total_time_energies){
-    ofs << tensor_data.first << ": " << tensor_data.second.first / total_runs << "\t" << tensor_data.second.second / total_runs << '\n';
-  }*/
-  ofs.close();
-  final_accuracy = (final_accuracy / batch_count) / total_runs;
-  dumpFinalAccuracy(final_accuracy);
-  online_profiler_output.close();
-}
-
-
-int main(int argc, char* argv[]){
-
-  llvm_hpvm_initTensorRt(0);
-
-  testCifarNet();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet_cifar10_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet_cifar10_profiling.cc
deleted file mode 100644
index 94cef7fba14e0b9d1d9ec72b508ccd11cb560a87..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet_cifar10_profiling.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  //void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  //uint8_t* labels = readLabels(labels_path.c_str(),10000); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv0.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv_bias0.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv3.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv_bias3.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv6.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv_bias6.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv7.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv_bias7.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv8.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv_bias8.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("fc12.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("fc_bias12.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 5000;
-  int batch_size = 1000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  int total_runs = 10;
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-  double total_energy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-
-  for(int i = 0; i < total_runs; i++){
-      for(int i = 0; i < batch_count; i++){
-
-        int start = i * batch_size;
-        int end = (i + 1) * batch_size;
-        void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);    
-
-        profiler.resume_profiler();
-        void* var_0 = tensorConvolution(input, conv2d_1_w, 5, 5, 1, 1, 1, 0); 
-        void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-        void* var_2 = tensorTanh(var_1); 
-        void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); 
-        void* var_5 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); 
-        void* var_6 = tensorAdd(var_5, conv2d_2_b); 
-        void* var_7 = tensorTanh(var_6); 
-        void* var_8 = tensorPooling(var_7,0,2,2,0,0,2,2); 
-        void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-        void* var_11 = tensorAdd(var_10, conv2d_3_b); 
-        void* var_12 = tensorTanh(var_11); 
-        void* var_13 = tensorConvolution(var_12, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-        void* var_14 = tensorAdd(var_13, conv2d_4_b); 
-        void* var_15 = tensorTanh(var_14); 
-        void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-        void* var_17 = tensorAdd(var_16, conv2d_5_b); 
-        void* var_18 = tensorTanh(var_17); 
-        void* var_19 = tensorPooling(var_18,0,2,2,0,0,2,2); 
-        void* var_22 = tensorGemmGPU(var_19, dense_1_w); 
-        void* var_23 = tensorAdd(var_22, dense_1_b); 
-        void* var_24 = tensorSoftmax(var_23); 
-
-        profiler.pause_profiler();
-        auto time_energy = profiler.get_time_energy();
-        total_time += time_energy.first;
-        total_energy += time_energy.second;
-        profiler.reset();
-
-        uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-        float accuracy = computeAccuracy2(labels,batch_size,var_24); 
-        final_accuracy += accuracy;
-        
-        freeBatchMemory();
-      } 
-  }
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"Average energy: " << total_energy / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count / total_runs;
-  dumpFinalAccuracy(final_accuracy);
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/blend_profiling.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/blend_profiling.cpp
deleted file mode 100644
index bd7b8e86644ae04d684bb1d777fdb914f943d62f..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/blend_profiling.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include <algorithm>
-#include <cassert>
-#include <fstream>
-#include <string>
-
-const size_t n_channels = 3;
-
-Tensor *gaussianFilter_(float div) {
-  std::vector<float> gauss_data = {1,  4, 6,  4,  1,  4, 16, 24, 16,
-                                   4,  6, 24, 36, 24, 6, 4,  16, 24,
-                                   16, 4, 1,  4,  6,  4, 1};
-  for (float &f : gauss_data)
-    f /= div;
-  return (Tensor *)createFilterFromData(
-      CUDNN_DATA_FLOAT, gauss_data.data(), 5, 5, 1);
-}
-
-Tensor *gaussianFilter() { return gaussianFilter_(16.0); }
-
-void *normalize(void *image) {
-  auto *max_1D = tensorReduce(image, 2, MathOp::Max);
-  auto *max = tensorReduce(max_1D, 3, MathOp::Max);
-  auto *img_norm = tensorMap2(MathOp::Div, image, max);
-  return img_norm;
-}
-
-extern std::vector<size_t> sizes(Tensor *t);
-
-void forward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[1] == 3);
-  sz[0] = sz[0] * sz[1];
-  sz[1] = 1;
-  reshape(tensor, sz);
-}
-
-void backward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[0] % 3 == 0);
-  sz[0] = sz[0] / 3;
-  sz[1] = 3;
-  reshape(tensor, sz);
-}
-
-void *sharpen(void *image) {
-  void *gaussian = gaussianFilter();
-  forward_reshape(image);
-  void *blurred =
-      tensorConvApprox(image, gaussian, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
-  backward_reshape(blurred);
-  backward_reshape(image);
-  void *blurred_norm = normalize(blurred);
-  void *image_norm = normalize(image);
-  void *ret = tensorMap2(MathOp::AddWeighted, blurred_norm, image_norm);
-  return ret;
-}
-
-void *main_procedure(void *fg, void *bg) {
-  void *g_bg = sharpen(bg);
-  void *g_fg = sharpen(fg);
-  void *ret = tensorMap2(MathOp::Blend2, g_bg, g_fg);
-  return ret;
-}
-
-extern void llvm_hpvm_initTensorRt(int gpuid);
-
-extern void llvm_hpvm_clearRuntimeController();
-
-const size_t batch_size = 500;
-
-int main() {
-  const char *input1_path = "../model_params/image_processing_5k";
-  const char *input2_path = "../model_params/image_processing_5k_shuffled";
-
-  llvm_hpvm_initTensorRt(0);
-  startMemTracking();
-  size_t bstart = 0;
-  startProfiling();
-  while (true) {
-    auto *background = readDataSet(input1_path, bstart, batch_size, n_channels),
-         *foreground = readDataSet(input2_path, bstart, batch_size, n_channels);
-    if (!background || !foreground)
-      break;
-
-    auto *result = main_procedure(foreground, background);
-    bstart += batch_size;
-    freeBatchMemory();
-  }
-  stopProfiling();
-  llvm_hpvm_clearRuntimeController();
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/canny_profiling.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/canny_profiling.cpp
deleted file mode 100644
index 233f317e611e79f66038795b44b295fd3683ae4a..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/canny_profiling.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include <vector>
-
-Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
-  int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-  auto *data = new float[w * h];
-  float sum = 0.0f;
-  for (int64_t i = -m; i <= m; i++)
-    for (int64_t j = -n; j <= n; j++) {
-      size_t idx = (i + m) * h + (j + n);
-      float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-      data[idx] = exp(exponent);
-      sum += data[idx];
-    }
-  if (sum != 0.0f)
-    for (size_t i = 0; i < w * h; i++)
-      data[i] /= sum;
-  return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
-}
-
-std::pair<Tensor *, Tensor *> getSobelKernels() {
-  std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1});
-  std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1});
-  auto *t1 =
-      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1);
-  auto *t2 =
-      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1);
-  return std::make_pair(t1, t2);
-}
-
-void *main_procedure(void *dataset) {
-  Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1);
-  Tensor *kernel_x, *kernel_y;
-  std::tie(kernel_x, kernel_y) = getSobelKernels();
-
-  // 0. Grayscale
-  auto *summed_image = tensorReduce(dataset, 1, MathOp::Add, 0.0f);
-  auto *grayscale_image = tensorMap1(MathOp::Avg3, summed_image);
-  // 1. Denoise
-  auto *image2 =
-      tensorConvApprox(grayscale_image, gaussian, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
-  // 2. Get edge gradient / direction
-  auto *grad_x =
-      tensorConvApprox(image2, kernel_x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-  auto *grad_y =
-      tensorConvApprox(image2, kernel_y, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-  auto *grad_mag = tensorMap2(MathOp::Hypot, grad_x, grad_y);
-  // 2.5. Normalize grad magnitude
-  auto *grad_max_1D = tensorReduce(grad_mag, 2, MathOp::Max, 0.0f);
-  auto *grad_max = tensorReduce(grad_max_1D, 3, MathOp::Max, 0.0f);
-  auto *grad_mag_norm = tensorMap2(MathOp::Div, grad_mag, grad_max);
-  return grad_mag_norm;
-}
-
-extern void llvm_hpvm_initTensorRt(int gpuid);
-
-extern void llvm_hpvm_clearRuntimeController();
-
-const size_t batch_size = 500;
-
-int main(int argc, char *argv[]) {
-  const char *input1_path = "../model_params/image_processing_5k";
-
-  llvm_hpvm_initTensorRt(0);
-  size_t bstart = 0;
-  startMemTracking();
-  startProfiling();
-  while (true) {
-    Tensor *batch = readDataSet(input1_path, bstart, batch_size);
-    if (batch == nullptr) // If end of dataset
-      break;
-
-    auto *result = main_procedure(batch);
-    bstart += batch_size;
-    freeBatchMemory();
-  }
-  stopProfiling();
-  llvm_hpvm_clearRuntimeController();
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/fft_profiling.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/fft_profiling.cpp
deleted file mode 100644
index 4c13e1587aa95a2eb4d1e645fa770f4b09d71dac..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/fft_profiling.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include <cmath>
-#include <iostream>
-#include <thrust/complex.h>
-#include <vector>
-
-const size_t batch_size = 250, total_max = 3000;
-const size_t n_colors = N_RGB_CHAN;
-const float psnr_threshold = 25.0;
-
-Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
-  int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-  auto *data = new float[w * h];
-  float sum = 0.0f;
-  for (int64_t i = -m; i <= m; i++)
-    for (int64_t j = -n; j <= n; j++) {
-      size_t idx = (i + m) * h + (j + n);
-      float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-      data[idx] = exp(exponent);
-      sum += data[idx];
-    }
-  if (sum != 0.0f)
-    for (size_t i = 0; i < w * h; i++)
-      data[i] /= sum;
-  return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
-}
-
-extern std::vector<size_t> sizes(Tensor *t);
-
-void forward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[1] == 3);
-  sz[0] = sz[0] * sz[1];
-  sz[1] = 1;
-  reshape(tensor, sz);
-}
-
-void backward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[0] % 3 == 0);
-  sz[0] = sz[0] / 3;
-  sz[1] = 3;
-  reshape(tensor, sz);
-}
-
-static void *normalize(void *image) {
-  auto *max_1D = tensorReduce(image, 2, MathOp::Max);
-  auto *max = tensorReduce(max_1D, 3, MathOp::Max);
-  auto *img_norm = tensorMap2(MathOp::Div, image, max);
-  freeTensor(max_1D);
-  freeTensor(max);
-  return img_norm;
-}
-
-void *main_procedure(void *dataset, void *gaussian) {
-  forward_reshape(dataset);
-  auto *sharpened = tensorConvApprox(dataset, gaussian, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-  backward_reshape(sharpened);
-  return normalize(sharpened);
-}
-
-int main(int argc, char *argv[]) {
-  const char *input1_path = "../model_params/image_processing_5k_128_128";
-  llvm_hpvm_initTensorRt(0);
-  Tensor *gaussian = gaussianFilter(1, 3, 3, 1);
-  size_t bstart = 0;
-  startMemTracking();
-  startProfiling();
-  while (true) {
-    Tensor *batch = readDataSet(input1_path, bstart, batch_size);
-    if (batch == nullptr) // If end of dataset
-      break;
-    auto *result = main_procedure(batch, gaussian);
-    bstart += batch_size;
-    if (bstart >= total_max)
-      break;
-    freeBatchMemory();
-  }
-  stopProfiling();
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/lenet_keras_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/lenet_keras_profiling.cc
deleted file mode 100644
index f55d883c2c11f1f0721b64d4ab00de373f685a3e..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/lenet_keras_profiling.cc
+++ /dev/null
@@ -1,186 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  int total_runs = 10;
-  
-  printf("********* Lenet-2 Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 5000;
-
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-  
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				CUDNN_DATA_FLOAT,
-				test_batch_size, 1, 28, 28);
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin",
-					  float_type, 32, 1, 5, 5);    
-  void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin",
-					  float_type, 64, 32, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin",
-					 float_type, 1, 1, 7*7*64, 1024);  
-  void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin",
-				      float_type, 1, 1024, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin",
-					 float_type, 1, 1, 1024, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-  
-  clearTensorMap();
- 
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-  double total_energy = 0.0;
-  float final_accuracy = 0.0;
-
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performnce profiling 
-    startProfiling();
-    profiler.resume_profiler();
-
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-
-    // NOTE: For tensorAdd, the only dimension that MUST match is channels  
-    tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
-
-    void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
-
-    void* conv1_tanh = tensorTanh(pool1out);
-
-    // NOTE: input channels have to match between tensor op inputs and outputs 
-    void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
-
-    void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
-
-    void* conv2_tanh = tensorTanh(pool2out);
-
-    void* gemm1out = tensorGemmGPU(conv2_tanh, fc1_weights);  
-
-    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-
-    void* tanh1out = tensorTanh(gemm1biasout);
-  
-    void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights);  
-  
-    void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
-
-    void* tanh2out = tensorTanh(gemm2_biasout);
-  
-    void* result = tensorSoftmax(tanh2out);
-
-    profiler.pause_profiler();
-    auto time_energy = profiler.get_time_energy();
-    total_time += time_energy.first;
-    total_energy += time_energy.second;
-
-    profiler.reset();
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    final_accuracy += computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"Average energy: " << total_energy / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  final_accuracy = final_accuracy / total_runs;
-  dumpFinalAccuracy(final_accuracy);
-}
-
-
-int main(int argc, char* argv[]){
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_cifar10_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_cifar10_profiling.cc
deleted file mode 100644
index e84ca25f6fc3b44b02ce5b45f4517ba6ad6bc3be..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_cifar10_profiling.cc
+++ /dev/null
@@ -1,438 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_quant/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_7_w_path =  dir_prefix + std::string("depthwise_conv2d_7_w.bin"); 
-  void* depthwise_conv2d_7_w =  readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_14_gamma_path =  dir_prefix + std::string("batch_normalization_14_gamma.bin"); 
-  void* batch_normalization_14_gamma =  readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_beta_path =  dir_prefix + std::string("batch_normalization_14_beta.bin"); 
-  void* batch_normalization_14_beta =  readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_mean_path =  dir_prefix + std::string("batch_normalization_14_mean.bin"); 
-  void* batch_normalization_14_mean =  readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_variance_path =  dir_prefix + std::string("batch_normalization_14_variance.bin"); 
-  void* batch_normalization_14_variance =  readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_15_gamma_path =  dir_prefix + std::string("batch_normalization_15_gamma.bin"); 
-  void* batch_normalization_15_gamma =  readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_beta_path =  dir_prefix + std::string("batch_normalization_15_beta.bin"); 
-  void* batch_normalization_15_beta =  readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_mean_path =  dir_prefix + std::string("batch_normalization_15_mean.bin"); 
-  void* batch_normalization_15_mean =  readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_variance_path =  dir_prefix + std::string("batch_normalization_15_variance.bin"); 
-  void* batch_normalization_15_variance =  readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_8_w_path =  dir_prefix + std::string("depthwise_conv2d_8_w.bin"); 
-  void* depthwise_conv2d_8_w =  readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_16_gamma_path =  dir_prefix + std::string("batch_normalization_16_gamma.bin"); 
-  void* batch_normalization_16_gamma =  readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_beta_path =  dir_prefix + std::string("batch_normalization_16_beta.bin"); 
-  void* batch_normalization_16_beta =  readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_mean_path =  dir_prefix + std::string("batch_normalization_16_mean.bin"); 
-  void* batch_normalization_16_mean =  readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_variance_path =  dir_prefix + std::string("batch_normalization_16_variance.bin"); 
-  void* batch_normalization_16_variance =  readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_17_gamma_path =  dir_prefix + std::string("batch_normalization_17_gamma.bin"); 
-  void* batch_normalization_17_gamma =  readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_beta_path =  dir_prefix + std::string("batch_normalization_17_beta.bin"); 
-  void* batch_normalization_17_beta =  readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_mean_path =  dir_prefix + std::string("batch_normalization_17_mean.bin"); 
-  void* batch_normalization_17_mean =  readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_variance_path =  dir_prefix + std::string("batch_normalization_17_variance.bin"); 
-  void* batch_normalization_17_variance =  readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_9_w_path =  dir_prefix + std::string("depthwise_conv2d_9_w.bin"); 
-  void* depthwise_conv2d_9_w =  readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_18_gamma_path =  dir_prefix + std::string("batch_normalization_18_gamma.bin"); 
-  void* batch_normalization_18_gamma =  readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_beta_path =  dir_prefix + std::string("batch_normalization_18_beta.bin"); 
-  void* batch_normalization_18_beta =  readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_mean_path =  dir_prefix + std::string("batch_normalization_18_mean.bin"); 
-  void* batch_normalization_18_mean =  readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_variance_path =  dir_prefix + std::string("batch_normalization_18_variance.bin"); 
-  void* batch_normalization_18_variance =  readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_19_gamma_path =  dir_prefix + std::string("batch_normalization_19_gamma.bin"); 
-  void* batch_normalization_19_gamma =  readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_beta_path =  dir_prefix + std::string("batch_normalization_19_beta.bin"); 
-  void* batch_normalization_19_beta =  readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_mean_path =  dir_prefix + std::string("batch_normalization_19_mean.bin"); 
-  void* batch_normalization_19_mean =  readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_variance_path =  dir_prefix + std::string("batch_normalization_19_variance.bin"); 
-  void* batch_normalization_19_variance =  readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_10_w_path =  dir_prefix + std::string("depthwise_conv2d_10_w.bin"); 
-  void* depthwise_conv2d_10_w =  readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_20_gamma_path =  dir_prefix + std::string("batch_normalization_20_gamma.bin"); 
-  void* batch_normalization_20_gamma =  readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_beta_path =  dir_prefix + std::string("batch_normalization_20_beta.bin"); 
-  void* batch_normalization_20_beta =  readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_mean_path =  dir_prefix + std::string("batch_normalization_20_mean.bin"); 
-  void* batch_normalization_20_mean =  readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_variance_path =  dir_prefix + std::string("batch_normalization_20_variance.bin"); 
-  void* batch_normalization_20_variance =  readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_21_gamma_path =  dir_prefix + std::string("batch_normalization_21_gamma.bin"); 
-  void* batch_normalization_21_gamma =  readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_beta_path =  dir_prefix + std::string("batch_normalization_21_beta.bin"); 
-  void* batch_normalization_21_beta =  readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_mean_path =  dir_prefix + std::string("batch_normalization_21_mean.bin"); 
-  void* batch_normalization_21_mean =  readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_variance_path =  dir_prefix + std::string("batch_normalization_21_variance.bin"); 
-  void* batch_normalization_21_variance =  readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_11_w_path =  dir_prefix + std::string("depthwise_conv2d_11_w.bin"); 
-  void* depthwise_conv2d_11_w =  readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_22_gamma_path =  dir_prefix + std::string("batch_normalization_22_gamma.bin"); 
-  void* batch_normalization_22_gamma =  readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_beta_path =  dir_prefix + std::string("batch_normalization_22_beta.bin"); 
-  void* batch_normalization_22_beta =  readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_mean_path =  dir_prefix + std::string("batch_normalization_22_mean.bin"); 
-  void* batch_normalization_22_mean =  readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_variance_path =  dir_prefix + std::string("batch_normalization_22_variance.bin"); 
-  void* batch_normalization_22_variance =  readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_23_gamma_path =  dir_prefix + std::string("batch_normalization_23_gamma.bin"); 
-  void* batch_normalization_23_gamma =  readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_beta_path =  dir_prefix + std::string("batch_normalization_23_beta.bin"); 
-  void* batch_normalization_23_beta =  readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_mean_path =  dir_prefix + std::string("batch_normalization_23_mean.bin"); 
-  void* batch_normalization_23_mean =  readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_variance_path =  dir_prefix + std::string("batch_normalization_23_variance.bin"); 
-  void* batch_normalization_23_variance =  readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_12_w_path =  dir_prefix + std::string("depthwise_conv2d_12_w.bin"); 
-  void* depthwise_conv2d_12_w =  readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_24_gamma_path =  dir_prefix + std::string("batch_normalization_24_gamma.bin"); 
-  void* batch_normalization_24_gamma =  readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_beta_path =  dir_prefix + std::string("batch_normalization_24_beta.bin"); 
-  void* batch_normalization_24_beta =  readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_mean_path =  dir_prefix + std::string("batch_normalization_24_mean.bin"); 
-  void* batch_normalization_24_mean =  readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_variance_path =  dir_prefix + std::string("batch_normalization_24_variance.bin"); 
-  void* batch_normalization_24_variance =  readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); 
-  std::string batch_normalization_25_gamma_path =  dir_prefix + std::string("batch_normalization_25_gamma.bin"); 
-  void* batch_normalization_25_gamma =  readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_beta_path =  dir_prefix + std::string("batch_normalization_25_beta.bin"); 
-  void* batch_normalization_25_beta =  readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_mean_path =  dir_prefix + std::string("batch_normalization_25_mean.bin"); 
-  void* batch_normalization_25_mean =  readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_variance_path =  dir_prefix + std::string("batch_normalization_25_variance.bin"); 
-  void* batch_normalization_25_variance =  readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string depthwise_conv2d_13_w_path =  dir_prefix + std::string("depthwise_conv2d_13_w.bin"); 
-  void* depthwise_conv2d_13_w =  readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); 
-  std::string batch_normalization_26_gamma_path =  dir_prefix + std::string("batch_normalization_26_gamma.bin"); 
-  void* batch_normalization_26_gamma =  readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_beta_path =  dir_prefix + std::string("batch_normalization_26_beta.bin"); 
-  void* batch_normalization_26_beta =  readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_mean_path =  dir_prefix + std::string("batch_normalization_26_mean.bin"); 
-  void* batch_normalization_26_mean =  readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_variance_path =  dir_prefix + std::string("batch_normalization_26_variance.bin"); 
-  void* batch_normalization_26_variance =  readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); 
-  std::string batch_normalization_27_gamma_path =  dir_prefix + std::string("batch_normalization_27_gamma.bin"); 
-  void* batch_normalization_27_gamma =  readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_beta_path =  dir_prefix + std::string("batch_normalization_27_beta.bin"); 
-  void* batch_normalization_27_beta =  readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_mean_path =  dir_prefix + std::string("batch_normalization_27_mean.bin"); 
-  void* batch_normalization_27_mean =  readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_variance_path =  dir_prefix + std::string("batch_normalization_27_variance.bin"); 
-  void* batch_normalization_27_variance =  readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-
-  startMemTracking(); 
-  startProfiling();
-
-  int test_input_size = 5000; 
-  int batch_size = 1000; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  int total_runs = 10;
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-  double total_energy = 0.0;
-
-  for(int i = 0; i < total_runs; i++){
-      for(int i = 0; i < batch_count; i++){ 
-
-        int start = i * batch_size; 
-        int end = (i + 1) * batch_size; 
-
-        void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-        profiler.resume_profiler();
-        void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-        void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-        void* var_2 = tensorRelu(var_1); 
-        void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-        void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-        void* var_6 = tensorRelu(var_5); 
-        void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-        void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-        void* var_9 = tensorRelu(var_8); 
-        void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-        void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-        void* var_13 = tensorRelu(var_12); 
-        void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-        void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-        void* var_16 = tensorRelu(var_15); 
-        void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-        void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-        void* var_20 = tensorRelu(var_19); 
-        void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-        void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-        void* var_23 = tensorRelu(var_22); 
-        void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-        void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-        void* var_28 = tensorRelu(var_27); 
-        void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-        void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-        void* var_31 = tensorRelu(var_30); 
-        void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-        void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-        void* var_35 = tensorRelu(var_34); 
-        void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-        void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-        void* var_38 = tensorRelu(var_37); 
-        void* var_41 = tensorConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-        void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-        void* var_43 = tensorRelu(var_42); 
-        void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-        void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-        void* var_46 = tensorRelu(var_45); 
-        void* var_48 = tensorConvolution(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); 
-        void* var_49 = tensorBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); 
-        void* var_50 = tensorRelu(var_49); 
-        void* var_51 = tensorConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); 
-        void* var_52 = tensorBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); 
-        void* var_53 = tensorRelu(var_52); 
-        void* var_55 = tensorConvolution(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); 
-        void* var_56 = tensorBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); 
-        void* var_57 = tensorRelu(var_56); 
-        void* var_58 = tensorConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); 
-        void* var_59 = tensorBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); 
-        void* var_60 = tensorRelu(var_59); 
-        void* var_63 = tensorConvolution(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); 
-        void* var_64 = tensorBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); 
-        void* var_65 = tensorRelu(var_64); 
-        void* var_66 = tensorConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); 
-        void* var_67 = tensorBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); 
-        void* var_68 = tensorRelu(var_67); 
-        void* var_70 = tensorConvolution(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); 
-        void* var_71 = tensorBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); 
-        void* var_72 = tensorRelu(var_71); 
-        void* var_73 = tensorConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); 
-        void* var_74 = tensorBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); 
-        void* var_75 = tensorRelu(var_74); 
-        void* var_77 = tensorConvolution(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); 
-        void* var_78 = tensorBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); 
-        void* var_79 = tensorRelu(var_78); 
-        void* var_80 = tensorConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); 
-        void* var_81 = tensorBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); 
-        void* var_82 = tensorRelu(var_81); 
-        void* var_85 = tensorConvolution(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); 
-        void* var_86 = tensorBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); 
-        void* var_87 = tensorRelu(var_86); 
-        void* var_88 = tensorConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); 
-        void* var_89 = tensorBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); 
-        void* var_90 = tensorRelu(var_89); 
-        void* var_92 = tensorConvolution(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); 
-        void* var_93 = tensorBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); 
-        void* var_94 = tensorRelu(var_93); 
-        void* var_95 = tensorConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); 
-        void* var_96 = tensorBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); 
-        void* var_97 = tensorRelu(var_96); 
-        void* var_99 = tensorPooling(var_97,1,2,2,0,0,2,2); 
-        void* var_101 = tensorGemmGPU(var_99, dense_1_w); 
-        void* var_102 = tensorAdd(var_101, dense_1_b); 
-        void* var_103 = tensorSoftmax(var_102); 
-
-        profiler.pause_profiler();
-        auto time_energy = profiler.get_time_energy();
-        total_time += time_energy.first;
-        total_energy += time_energy.second;
-        profiler.reset();
-
-        uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-        float accuracy = computeAccuracy2(labels, batch_size, var_103); 
-        final_accuracy += accuracy; 
-        freeBatchMemory(); 
-      }
-  }
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"Average energy: " << total_energy / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count / total_runs; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_depthwise_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_depthwise_profiling.cc
deleted file mode 100644
index 3dcce8ada9c74a439440594eb7df8a33c169cf6c..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_depthwise_profiling.cc
+++ /dev/null
@@ -1,415 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/mobilenet/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_7_w_path =  dir_prefix + std::string("depthwise_conv2d_7_w.bin"); 
-  void* depthwise_conv2d_7_w =  readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_14_gamma_path =  dir_prefix + std::string("batch_normalization_14_gamma.bin"); 
-  void* batch_normalization_14_gamma =  readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_beta_path =  dir_prefix + std::string("batch_normalization_14_beta.bin"); 
-  void* batch_normalization_14_beta =  readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_mean_path =  dir_prefix + std::string("batch_normalization_14_mean.bin"); 
-  void* batch_normalization_14_mean =  readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_variance_path =  dir_prefix + std::string("batch_normalization_14_variance.bin"); 
-  void* batch_normalization_14_variance =  readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_15_gamma_path =  dir_prefix + std::string("batch_normalization_15_gamma.bin"); 
-  void* batch_normalization_15_gamma =  readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_beta_path =  dir_prefix + std::string("batch_normalization_15_beta.bin"); 
-  void* batch_normalization_15_beta =  readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_mean_path =  dir_prefix + std::string("batch_normalization_15_mean.bin"); 
-  void* batch_normalization_15_mean =  readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_variance_path =  dir_prefix + std::string("batch_normalization_15_variance.bin"); 
-  void* batch_normalization_15_variance =  readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_8_w_path =  dir_prefix + std::string("depthwise_conv2d_8_w.bin"); 
-  void* depthwise_conv2d_8_w =  readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_16_gamma_path =  dir_prefix + std::string("batch_normalization_16_gamma.bin"); 
-  void* batch_normalization_16_gamma =  readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_beta_path =  dir_prefix + std::string("batch_normalization_16_beta.bin"); 
-  void* batch_normalization_16_beta =  readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_mean_path =  dir_prefix + std::string("batch_normalization_16_mean.bin"); 
-  void* batch_normalization_16_mean =  readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_variance_path =  dir_prefix + std::string("batch_normalization_16_variance.bin"); 
-  void* batch_normalization_16_variance =  readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_17_gamma_path =  dir_prefix + std::string("batch_normalization_17_gamma.bin"); 
-  void* batch_normalization_17_gamma =  readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_beta_path =  dir_prefix + std::string("batch_normalization_17_beta.bin"); 
-  void* batch_normalization_17_beta =  readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_mean_path =  dir_prefix + std::string("batch_normalization_17_mean.bin"); 
-  void* batch_normalization_17_mean =  readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_variance_path =  dir_prefix + std::string("batch_normalization_17_variance.bin"); 
-  void* batch_normalization_17_variance =  readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_9_w_path =  dir_prefix + std::string("depthwise_conv2d_9_w.bin"); 
-  void* depthwise_conv2d_9_w =  readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_18_gamma_path =  dir_prefix + std::string("batch_normalization_18_gamma.bin"); 
-  void* batch_normalization_18_gamma =  readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_beta_path =  dir_prefix + std::string("batch_normalization_18_beta.bin"); 
-  void* batch_normalization_18_beta =  readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_mean_path =  dir_prefix + std::string("batch_normalization_18_mean.bin"); 
-  void* batch_normalization_18_mean =  readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_variance_path =  dir_prefix + std::string("batch_normalization_18_variance.bin"); 
-  void* batch_normalization_18_variance =  readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_19_gamma_path =  dir_prefix + std::string("batch_normalization_19_gamma.bin"); 
-  void* batch_normalization_19_gamma =  readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_beta_path =  dir_prefix + std::string("batch_normalization_19_beta.bin"); 
-  void* batch_normalization_19_beta =  readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_mean_path =  dir_prefix + std::string("batch_normalization_19_mean.bin"); 
-  void* batch_normalization_19_mean =  readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_variance_path =  dir_prefix + std::string("batch_normalization_19_variance.bin"); 
-  void* batch_normalization_19_variance =  readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_10_w_path =  dir_prefix + std::string("depthwise_conv2d_10_w.bin"); 
-  void* depthwise_conv2d_10_w =  readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_20_gamma_path =  dir_prefix + std::string("batch_normalization_20_gamma.bin"); 
-  void* batch_normalization_20_gamma =  readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_beta_path =  dir_prefix + std::string("batch_normalization_20_beta.bin"); 
-  void* batch_normalization_20_beta =  readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_mean_path =  dir_prefix + std::string("batch_normalization_20_mean.bin"); 
-  void* batch_normalization_20_mean =  readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_variance_path =  dir_prefix + std::string("batch_normalization_20_variance.bin"); 
-  void* batch_normalization_20_variance =  readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_21_gamma_path =  dir_prefix + std::string("batch_normalization_21_gamma.bin"); 
-  void* batch_normalization_21_gamma =  readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_beta_path =  dir_prefix + std::string("batch_normalization_21_beta.bin"); 
-  void* batch_normalization_21_beta =  readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_mean_path =  dir_prefix + std::string("batch_normalization_21_mean.bin"); 
-  void* batch_normalization_21_mean =  readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_variance_path =  dir_prefix + std::string("batch_normalization_21_variance.bin"); 
-  void* batch_normalization_21_variance =  readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_11_w_path =  dir_prefix + std::string("depthwise_conv2d_11_w.bin"); 
-  void* depthwise_conv2d_11_w =  readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_22_gamma_path =  dir_prefix + std::string("batch_normalization_22_gamma.bin"); 
-  void* batch_normalization_22_gamma =  readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_beta_path =  dir_prefix + std::string("batch_normalization_22_beta.bin"); 
-  void* batch_normalization_22_beta =  readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_mean_path =  dir_prefix + std::string("batch_normalization_22_mean.bin"); 
-  void* batch_normalization_22_mean =  readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_variance_path =  dir_prefix + std::string("batch_normalization_22_variance.bin"); 
-  void* batch_normalization_22_variance =  readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_23_gamma_path =  dir_prefix + std::string("batch_normalization_23_gamma.bin"); 
-  void* batch_normalization_23_gamma =  readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_beta_path =  dir_prefix + std::string("batch_normalization_23_beta.bin"); 
-  void* batch_normalization_23_beta =  readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_mean_path =  dir_prefix + std::string("batch_normalization_23_mean.bin"); 
-  void* batch_normalization_23_mean =  readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_variance_path =  dir_prefix + std::string("batch_normalization_23_variance.bin"); 
-  void* batch_normalization_23_variance =  readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_12_w_path =  dir_prefix + std::string("depthwise_conv2d_12_w.bin"); 
-  void* depthwise_conv2d_12_w =  readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_24_gamma_path =  dir_prefix + std::string("batch_normalization_24_gamma.bin"); 
-  void* batch_normalization_24_gamma =  readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_beta_path =  dir_prefix + std::string("batch_normalization_24_beta.bin"); 
-  void* batch_normalization_24_beta =  readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_mean_path =  dir_prefix + std::string("batch_normalization_24_mean.bin"); 
-  void* batch_normalization_24_mean =  readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_variance_path =  dir_prefix + std::string("batch_normalization_24_variance.bin"); 
-  void* batch_normalization_24_variance =  readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); 
-  std::string batch_normalization_25_gamma_path =  dir_prefix + std::string("batch_normalization_25_gamma.bin"); 
-  void* batch_normalization_25_gamma =  readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_beta_path =  dir_prefix + std::string("batch_normalization_25_beta.bin"); 
-  void* batch_normalization_25_beta =  readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_mean_path =  dir_prefix + std::string("batch_normalization_25_mean.bin"); 
-  void* batch_normalization_25_mean =  readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_variance_path =  dir_prefix + std::string("batch_normalization_25_variance.bin"); 
-  void* batch_normalization_25_variance =  readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string depthwise_conv2d_13_w_path =  dir_prefix + std::string("depthwise_conv2d_13_w.bin"); 
-  void* depthwise_conv2d_13_w =  readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); 
-  std::string batch_normalization_26_gamma_path =  dir_prefix + std::string("batch_normalization_26_gamma.bin"); 
-  void* batch_normalization_26_gamma =  readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_beta_path =  dir_prefix + std::string("batch_normalization_26_beta.bin"); 
-  void* batch_normalization_26_beta =  readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_mean_path =  dir_prefix + std::string("batch_normalization_26_mean.bin"); 
-  void* batch_normalization_26_mean =  readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_variance_path =  dir_prefix + std::string("batch_normalization_26_variance.bin"); 
-  void* batch_normalization_26_variance =  readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); 
-  std::string batch_normalization_27_gamma_path =  dir_prefix + std::string("batch_normalization_27_gamma.bin"); 
-  void* batch_normalization_27_gamma =  readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_beta_path =  dir_prefix + std::string("batch_normalization_27_beta.bin"); 
-  void* batch_normalization_27_beta =  readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_mean_path =  dir_prefix + std::string("batch_normalization_27_mean.bin"); 
-  void* batch_normalization_27_mean =  readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_variance_path =  dir_prefix + std::string("batch_normalization_27_variance.bin"); 
-  void* batch_normalization_27_variance =  readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000; 
-  int batch_size = 1000;  
-  int batch_count = test_input_size / batch_size; 
-
-  int total_runs = 10;
-  float final_accuracy = 0.0; 
-
-  for (int run_num = 0; run_num < total_runs; run_num++){
-      for(int i = 0; i < batch_count; i++){ 
-
-        int start = i * batch_size; 
-        int end = (i + 1) * batch_size; 
-
-        void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-        void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-        void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-        void* var_2 = tensorRelu(var_1); 
-        void* var_4 = tensorConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-        void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-        void* var_6 = tensorRelu(var_5); 
-        void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-        void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-        void* var_9 = tensorRelu(var_8); 
-        void* var_11 = tensorConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-        void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-        void* var_13 = tensorRelu(var_12); 
-        void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-        void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-        void* var_16 = tensorRelu(var_15); 
-        void* var_18 = tensorConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-        void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-        void* var_20 = tensorRelu(var_19); 
-        void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-        void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-        void* var_23 = tensorRelu(var_22); 
-        void* var_26 = tensorConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-        void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-        void* var_28 = tensorRelu(var_27); 
-        void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-        void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-        void* var_31 = tensorRelu(var_30); 
-        void* var_33 = tensorConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-        void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-        void* var_35 = tensorRelu(var_34); 
-        void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-        void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-        void* var_38 = tensorRelu(var_37); 
-        void* var_41 = tensorConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-        void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-        void* var_43 = tensorRelu(var_42); 
-        void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-        void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-        void* var_46 = tensorRelu(var_45); 
-        void* var_48 = tensorConvCutlass(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); 
-        void* var_49 = tensorBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); 
-        void* var_50 = tensorRelu(var_49); 
-        void* var_51 = tensorConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); 
-        void* var_52 = tensorBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); 
-        void* var_53 = tensorRelu(var_52); 
-        void* var_55 = tensorConvCutlass(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); 
-        void* var_56 = tensorBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); 
-        void* var_57 = tensorRelu(var_56); 
-        void* var_58 = tensorConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); 
-        void* var_59 = tensorBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); 
-        void* var_60 = tensorRelu(var_59); 
-        void* var_63 = tensorConvCutlass(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); 
-        void* var_64 = tensorBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); 
-        void* var_65 = tensorRelu(var_64); 
-        void* var_66 = tensorConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); 
-        void* var_67 = tensorBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); 
-        void* var_68 = tensorRelu(var_67); 
-        void* var_70 = tensorConvCutlass(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); 
-        void* var_71 = tensorBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); 
-        void* var_72 = tensorRelu(var_71); 
-        void* var_73 = tensorConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); 
-        void* var_74 = tensorBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); 
-        void* var_75 = tensorRelu(var_74); 
-        void* var_77 = tensorConvCutlass(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); 
-        void* var_78 = tensorBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); 
-        void* var_79 = tensorRelu(var_78); 
-        void* var_80 = tensorConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); 
-        void* var_81 = tensorBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); 
-        void* var_82 = tensorRelu(var_81); 
-        void* var_85 = tensorConvCutlass(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); 
-        void* var_86 = tensorBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); 
-        void* var_87 = tensorRelu(var_86); 
-        void* var_88 = tensorConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); 
-        void* var_89 = tensorBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); 
-        void* var_90 = tensorRelu(var_89); 
-        void* var_92 = tensorConvCutlass(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); 
-        void* var_93 = tensorBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); 
-        void* var_94 = tensorRelu(var_93); 
-        void* var_95 = tensorConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); 
-        void* var_96 = tensorBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); 
-        void* var_97 = tensorRelu(var_96); 
-        void* var_99 = tensorPooling(var_97,1,2,2,0,0,2,2); 
-        void* var_101 = tensorGemmGPU(var_99, dense_1_w); 
-        void* var_102 = tensorAdd(var_101, dense_1_b); 
-        void* var_103 = tensorSoftmax(var_102); 
-
-        uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-        float accuracy = computeAccuracy2(labels, batch_size, var_103); 
-        final_accuracy += accuracy; 
-        freeBatchMemory(); 
-      }
-  }
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_depthwise_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_depthwise_profiling.cc
deleted file mode 100644
index 5af17774b41d4d265e110dd988e3458442312226..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_depthwise_profiling.cc
+++ /dev/null
@@ -1,247 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(int argc, char* argv[]){ 
-
-  int total_runs = 10;
-  if (argc > 1){
-    total_runs = atoi(argv[1]);
-  }
-
-  
-  llvm_hpvm_initTensorRt(0); 
-
-  //std::string dir_prefix = std::string("../../keras/data/mobilenet_shallow_nathan/");
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_shallow/");
-
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000; 
-  int batch_size = 500; 
-  int batch_count = test_input_size / batch_size; 
-
-
-  float final_accuracy = 0.0;
-
-  for(int j = 0; j < total_runs; j++){    
-    for(int i = 0; i < batch_count; i++){ 
-
-      int start = i * batch_size; 
-      int end = (i + 1) * batch_size; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-      void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-      void* var_2 = tensorRelu(var_1); 
-      void* var_4 = tensorConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-      void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-      void* var_6 = tensorRelu(var_5); 
-      void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-      void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-      void* var_9 = tensorRelu(var_8); 
-      void* var_11 = tensorConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-      void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-      void* var_13 = tensorRelu(var_12); 
-      void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-      void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-      void* var_16 = tensorRelu(var_15); 
-      void* var_18 = tensorConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-      void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-      void* var_20 = tensorRelu(var_19); 
-      void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-      void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-      void* var_23 = tensorRelu(var_22); 
-      void* var_26 = tensorConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-      void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-      void* var_28 = tensorRelu(var_27); 
-      void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-      void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-      void* var_31 = tensorRelu(var_30); 
-      void* var_33 = tensorConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-      void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-      void* var_35 = tensorRelu(var_34); 
-      void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-      void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-      void* var_38 = tensorRelu(var_37); 
-      void* var_41 = tensorConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-      void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-      void* var_43 = tensorRelu(var_42); 
-      void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-      void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-      void* var_46 = tensorRelu(var_45); 
-      void* var_47 = tensorPooling(var_46,1,2,2,0,0,2,2); 
-      void* var_49 = tensorGemmGPU(var_47, dense_1_w); 
-      void* var_50 = tensorAdd(var_49, dense_1_b); 
-      void* var_51 = tensorSoftmax(var_50); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_51); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    //final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy / batch_count); 
-  }
-
-  final_accuracy = final_accuracy / batch_count / total_runs; 
-  dumpFinalAccuracy(final_accuracy);
-
-  //dumpExecutionAccuracies();
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_profiling.cc
deleted file mode 100644
index c60f15e3cb71d9fa81b444be8348803e1d7891d6..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_profiling.cc
+++ /dev/null
@@ -1,227 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000;
-  int batch_size = 1000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  int total_runs = 10;
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-  double total_energy = 0.0;
-
-  for(int i = 0; i < total_runs; i++){
-	  for(int i = 0; i < batch_count; i++){ 
-
-		int start = i * batch_size; 
-		int end = (i + 1) * batch_size; 
-
-		void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-        profiler.resume_profiler();
-
-		void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-		void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-		void* var_2 = tensorRelu(var_1); 
-		void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-		void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-		void* var_6 = tensorRelu(var_5); 
-		void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-		void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-		void* var_9 = tensorRelu(var_8); 
-		void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-		void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-		void* var_13 = tensorRelu(var_12); 
-		void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-		void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-		void* var_16 = tensorRelu(var_15); 
-		void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 2, 2, 1, 64); 
-		void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-		void* var_20 = tensorRelu(var_19); 
-		void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-		void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-		void* var_23 = tensorRelu(var_22); 
-		void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-		void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-		void* var_28 = tensorRelu(var_27); 
-		void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-		void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-		void* var_31 = tensorRelu(var_30); 
-		void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-		void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-		void* var_35 = tensorRelu(var_34); 
-		void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-		void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-		void* var_38 = tensorRelu(var_37); 
-		void* var_40 = tensorPooling(var_38,1,2,2,0,0,2,2); 
-		void* var_42 = tensorGemmGPU(var_40, dense_1_w); 
-		void* var_43 = tensorAdd(var_42, dense_1_b); 
-		void* var_44 = tensorSoftmax(var_43); 
-
-        profiler.pause_profiler();
-        auto time_energy = profiler.get_time_energy();
-        total_time += time_energy.first;
-        total_energy += time_energy.second;
-        profiler.reset();
-
-		uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-		float accuracy = computeAccuracy2(labels, batch_size, var_44); 
-		final_accuracy += accuracy; 
-		freeBatchMemory(); 
-	  } 
-  }
-
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"Average energy: " << total_energy / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  final_accuracy = final_accuracy / batch_count / total_runs; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/resnet18_cifar10_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/resnet18_cifar10_profiling.cc
deleted file mode 100644
index 1b0664200170235e2d0dac5682108de97b094776..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/resnet18_cifar10_profiling.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-  
-  std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  //uint8_t* labels = readLabels(labels_path.c_str(), batch_size); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_14_b_path =  dir_prefix + std::string("conv2d_14_b.bin"); 
-  void* conv2d_14_b =  readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_15_w_path =  dir_prefix + std::string("conv2d_15_w.bin"); 
-  void* conv2d_15_w =  readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); 
-  std::string conv2d_15_b_path =  dir_prefix + std::string("conv2d_15_b.bin"); 
-  void* conv2d_15_b =  readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_17_w_path =  dir_prefix + std::string("conv2d_17_w.bin"); 
-  void* conv2d_17_w =  readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); 
-  std::string conv2d_17_b_path =  dir_prefix + std::string("conv2d_17_b.bin"); 
-  void* conv2d_17_b =  readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_16_w_path =  dir_prefix + std::string("conv2d_16_w.bin"); 
-  void* conv2d_16_w =  readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_16_b_path =  dir_prefix + std::string("conv2d_16_b.bin"); 
-  void* conv2d_16_b =  readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_18_w_path =  dir_prefix + std::string("conv2d_18_w.bin"); 
-  void* conv2d_18_w =  readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_18_b_path =  dir_prefix + std::string("conv2d_18_b.bin"); 
-  void* conv2d_18_b =  readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_19_w_path =  dir_prefix + std::string("conv2d_19_w.bin"); 
-  void* conv2d_19_w =  readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_19_b_path =  dir_prefix + std::string("conv2d_19_b.bin"); 
-  void* conv2d_19_b =  readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_20_w_path =  dir_prefix + std::string("conv2d_20_w.bin"); 
-  void* conv2d_20_w =  readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_20_b_path =  dir_prefix + std::string("conv2d_20_b.bin"); 
-  void* conv2d_20_b =  readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_21_w_path =  dir_prefix + std::string("conv2d_21_w.bin"); 
-  void* conv2d_21_w =  readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_21_b_path =  dir_prefix + std::string("conv2d_21_b.bin"); 
-  void* conv2d_21_b =  readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 5000;
-  int batch_size = 1000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  int total_runs = 10;
-
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-  double total_energy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-
-  for(int i = 0; i < total_runs; i++){
-      for(int i = 0; i < batch_count; i++){
-        int start = i * batch_size;
-        int end = (i + 1) * batch_size;
-        
-        void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-
-        profiler.resume_profiler();
-        
-        void* var_2 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-        void* var_3 = tensorAdd(var_2, conv2d_1_b); 
-        void* var_4 = tensorRelu(var_3); 
-        void* var_6 = tensorConvolution(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-        void* var_7 = tensorAdd(var_6, conv2d_2_b); 
-        void* var_8 = tensorRelu(var_7); 
-        void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-        void* var_11 = tensorAdd(var_10, conv2d_3_b); 
-        void* var_12 = tensorAdd(var_4, var_11); 
-        void* var_13 = tensorRelu(var_12); 
-        void* var_15 = tensorConvolution(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-        void* var_16 = tensorAdd(var_15, conv2d_4_b); 
-        void* var_17 = tensorRelu(var_16); 
-        void* var_19 = tensorConvolution(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-        void* var_20 = tensorAdd(var_19, conv2d_5_b); 
-        void* var_21 = tensorAdd(var_13, var_20); 
-        void* var_22 = tensorRelu(var_21); 
-        void* var_24 = tensorConvolution(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-        void* var_25 = tensorAdd(var_24, conv2d_6_b); 
-        void* var_26 = tensorRelu(var_25); 
-        void* var_28 = tensorConvolution(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-        void* var_29 = tensorAdd(var_28, conv2d_7_b); 
-        void* var_30 = tensorAdd(var_22, var_29); 
-        void* var_31 = tensorRelu(var_30); 
-        void* var_33 = tensorConvolution(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0); 
-        void* var_34 = tensorAdd(var_33, conv2d_8_b); 
-        void* var_35 = tensorRelu(var_34); 
-        void* var_37 = tensorConvolution(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-        void* var_38 = tensorAdd(var_37, conv2d_9_b); 
-        void* var_40 = tensorConvolution(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0); 
-        void* var_41 = tensorAdd(var_40, conv2d_10_b); 
-        void* var_42 = tensorAdd(var_41, var_38); 
-        void* var_43 = tensorRelu(var_42); 
-        void* var_45 = tensorConvolution(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-        void* var_46 = tensorAdd(var_45, conv2d_11_b); 
-        void* var_47 = tensorRelu(var_46); 
-        void* var_49 = tensorConvolution(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-        void* var_50 = tensorAdd(var_49, conv2d_12_b); 
-        void* var_51 = tensorAdd(var_43, var_50); 
-        void* var_52 = tensorRelu(var_51); 
-        void* var_54 = tensorConvolution(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-        void* var_55 = tensorAdd(var_54, conv2d_13_b); 
-        void* var_56 = tensorRelu(var_55); 
-        void* var_58 = tensorConvolution(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0); 
-        void* var_59 = tensorAdd(var_58, conv2d_14_b); 
-        void* var_60 = tensorAdd(var_52, var_59); 
-        void* var_61 = tensorRelu(var_60); 
-        void* var_63 = tensorConvolution(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0); 
-        void* var_64 = tensorAdd(var_63, conv2d_15_b); 
-        void* var_65 = tensorRelu(var_64); 
-        void* var_67 = tensorConvolution(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0); 
-        void* var_68 = tensorAdd(var_67, conv2d_16_b); 
-        void* var_70 = tensorConvolution(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0); 
-        void* var_71 = tensorAdd(var_70, conv2d_17_b); 
-        void* var_72 = tensorAdd(var_71, var_68); 
-        void* var_73 = tensorRelu(var_72); 
-        void* var_75 = tensorConvolution(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0); 
-        void* var_76 = tensorAdd(var_75, conv2d_18_b); 
-        void* var_77 = tensorRelu(var_76); 
-        void* var_79 = tensorConvolution(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0); 
-        void* var_80 = tensorAdd(var_79, conv2d_19_b); 
-        void* var_81 = tensorAdd(var_73, var_80); 
-        void* var_82 = tensorRelu(var_81); 
-        void* var_84 = tensorConvolution(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0); 
-        void* var_85 = tensorAdd(var_84, conv2d_20_b); 
-        void* var_86 = tensorRelu(var_85); 
-        void* var_88 = tensorConvolution(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0); 
-        void* var_89 = tensorAdd(var_88, conv2d_21_b); 
-        void* var_90 = tensorAdd(var_82, var_89); 
-        void* var_91 = tensorRelu(var_90); 
-        void* var_92 = tensorPooling(var_91,1,8,8,0,0,8,8); 
-        void* var_94 = tensorGemmGPU(var_92, dense_1_w); 
-        void* var_95 = tensorAdd(var_94, dense_1_b); 
-        void* var_96 = tensorSoftmax(var_95); 
-
-        profiler.pause_profiler();
-        auto time_energy = profiler.get_time_energy();
-        total_time += time_energy.first;
-        total_energy += time_energy.second;
-        profiler.reset();
-
-        uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-        float accuracy = computeAccuracy2(labels,batch_size,var_96); 
-        final_accuracy += accuracy;
-        
-        freeBatchMemory();
-    }
-  }
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"Average energy: " << total_energy / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count / total_runs;
-  dumpFinalAccuracy(final_accuracy);
-
-  
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar100_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar100_profiling.cc
deleted file mode 100644
index 70246dac4b3d43550f49a0a653d1c13396f3a84a..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar100_profiling.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000; 
-  int batch_size = 500; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  int total_runs = 10;
-  Profiler profiler;
-  profiler.start_profiler();
-  double total_time = 0.0;
-  double total_energy = 0.0;
-
-  for (int i = 0; i < total_runs; i++){
-	  for(int i = 0; i < batch_count; i++){ 
-
-		int start = i * batch_size; 
-		int end = (i + 1) * batch_size; 
-
-		void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-        profiler.resume_profiler();
-
-		void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-		void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-		void* var_2 = tensorRelu(var_1); 
-		void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-		void* var_5 = tensorAdd(var_4, conv2d_2_b); 
-		void* var_6 = tensorRelu(var_5); 
-		void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); 
-		void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-		void* var_9 = tensorAdd(var_8, conv2d_3_b); 
-		void* var_10 = tensorRelu(var_9); 
-		void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-		void* var_13 = tensorAdd(var_12, conv2d_4_b); 
-		void* var_14 = tensorRelu(var_13); 
-		void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); 
-		void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-		void* var_17 = tensorAdd(var_16, conv2d_5_b); 
-		void* var_18 = tensorRelu(var_17); 
-		void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-		void* var_21 = tensorAdd(var_20, conv2d_6_b); 
-		void* var_22 = tensorRelu(var_21); 
-		void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-		void* var_25 = tensorAdd(var_24, conv2d_7_b); 
-		void* var_26 = tensorRelu(var_25); 
-		void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); 
-		void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); 
-		void* var_29 = tensorAdd(var_28, conv2d_8_b); 
-		void* var_30 = tensorRelu(var_29); 
-		void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-		void* var_33 = tensorAdd(var_32, conv2d_9_b); 
-		void* var_34 = tensorRelu(var_33); 
-		void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); 
-		void* var_37 = tensorAdd(var_36, conv2d_10_b); 
-		void* var_38 = tensorRelu(var_37); 
-		void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); 
-		void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-		void* var_41 = tensorAdd(var_40, conv2d_11_b); 
-		void* var_42 = tensorRelu(var_41); 
-		void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-		void* var_45 = tensorAdd(var_44, conv2d_12_b); 
-		void* var_46 = tensorRelu(var_45); 
-		void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-		void* var_49 = tensorAdd(var_48, conv2d_13_b); 
-		void* var_50 = tensorRelu(var_49); 
-		void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); 
-		void* var_54 = tensorGemmGPU(var_51, dense_1_w); 
-		void* var_55 = tensorAdd(var_54, dense_1_b); 
-		void* var_56 = tensorRelu(var_55); 
-		void* var_58 = tensorGemmGPU(var_56, dense_2_w); 
-		void* var_59 = tensorAdd(var_58, dense_2_b); 
-		void* var_60 = tensorSoftmax(var_59); 
-
-        profiler.pause_profiler();
-        auto time_energy = profiler.get_time_energy();
-        total_time += time_energy.first;
-        total_energy += time_energy.second;
-        profiler.reset();
-
-		uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-		float accuracy = computeAccuracy2(labels, batch_size, var_60, 100); 
-		final_accuracy += accuracy; 
-		freeBatchMemory(); 
-	 
-	  }
-  }
-
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"Average energy: " << total_energy / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  final_accuracy = final_accuracy / batch_count / total_runs; 
-  dumpFinalAccuracy(final_accuracy); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar10_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar10_profiling.cc
deleted file mode 100644
index 7ed583884a3fa2fca745bde4d27f8ca92cfcda02..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar10_profiling.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h"
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/vgg16_cifar10_2/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 5000;
-  int batch_size = 500;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  Profiler profiler;
-  profiler.start_profiler();
-
-  double total_time = 0.0;
-  double total_energy = 0.0;
-
-  int total_runs = 10; 
-  for(int i = 0; i < total_runs; i++){
-      for(int i = 0; i < batch_count; i++){
-
-        int start = i * batch_size;
-        int end = (i + 1) * batch_size;
-        
-        void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); 
-
-        profiler.resume_profiler();
-     
-        void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-        void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-        void* var_2 = tensorRelu(var_1); 
-        void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-        void* var_5 = tensorAdd(var_4, conv2d_2_b); 
-        void* var_6 = tensorRelu(var_5); 
-        void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); 
-        void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-        void* var_9 = tensorAdd(var_8, conv2d_3_b); 
-        void* var_10 = tensorRelu(var_9); 
-        void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-        void* var_13 = tensorAdd(var_12, conv2d_4_b); 
-        void* var_14 = tensorRelu(var_13); 
-        void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); 
-        void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-        void* var_17 = tensorAdd(var_16, conv2d_5_b); 
-        void* var_18 = tensorRelu(var_17); 
-        void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-        void* var_21 = tensorAdd(var_20, conv2d_6_b); 
-        void* var_22 = tensorRelu(var_21); 
-        void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-        void* var_25 = tensorAdd(var_24, conv2d_7_b); 
-        void* var_26 = tensorRelu(var_25); 
-        void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); 
-        void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); 
-        void* var_29 = tensorAdd(var_28, conv2d_8_b); 
-        void* var_30 = tensorRelu(var_29); 
-        void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-        void* var_33 = tensorAdd(var_32, conv2d_9_b); 
-        void* var_34 = tensorRelu(var_33); 
-        void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); 
-        void* var_37 = tensorAdd(var_36, conv2d_10_b); 
-        void* var_38 = tensorRelu(var_37); 
-        void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); 
-        void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-        void* var_41 = tensorAdd(var_40, conv2d_11_b); 
-        void* var_42 = tensorRelu(var_41); 
-        void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-        void* var_45 = tensorAdd(var_44, conv2d_12_b); 
-        void* var_46 = tensorRelu(var_45); 
-        void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-        void* var_49 = tensorAdd(var_48, conv2d_13_b); 
-        void* var_50 = tensorRelu(var_49); 
-        void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); 
-        void* var_54 = tensorGemmGPU(var_51, dense_1_w); 
-        void* var_55 = tensorAdd(var_54, dense_1_b); 
-        void* var_56 = tensorRelu(var_55); 
-        void* var_58 = tensorGemmGPU(var_56, dense_2_w); 
-        void* var_59 = tensorAdd(var_58, dense_2_b); 
-        void* var_60 = tensorSoftmax(var_59); 
-
-        profiler.pause_profiler();
-        auto time_energy = profiler.get_time_energy();
-        total_time += time_energy.first;
-        total_energy += time_energy.second;
-        profiler.reset();
-
-        uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-        float accuracy = computeAccuracy2(labels,batch_size,var_60); 
-        final_accuracy += accuracy;
-        
-        freeBatchMemory();
-    }
-  }
-  profiler.stop_profiler();
-
-  std::cout<<"---------------------------------------\n";
-  std::cout<<"Average time: " << total_time / total_runs << '\n';
-  std::cout<<"Average energy: " << total_energy / total_runs << '\n';
-  std::cout<<"---------------------------------------\n";
-
-  final_accuracy = final_accuracy / batch_count / total_runs;
-  dumpFinalAccuracy(final_accuracy);
-  
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_cifar10_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_cifar10_promise.cc
deleted file mode 100644
index fbc9d038505313adefdf9100a1e55e3a98d823f8..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_cifar10_promise.cc
+++ /dev/null
@@ -1,163 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 100000;
-  }
-  
-  printf("********* Lenet-2 Architecture ********** \n");
-
-  int test_batch_size = 5000;
-
-  uint8_t* labels = readLabels("../model_params/alexnet2_cifar10/test_labels.bin", test_batch_size);
-
-  for(int i = 0; i < total_runs; i++){
-
-    void* input = readTrainedWeights("../model_params/alexnet2_cifar10/norm_cifar_input.bin",
-			  	   float_type,
-				   test_batch_size, 3, 32, 32);
-    
-    void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin",
-					    float_type, 32, 3, 3, 3);  
-    void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin",
-					  float_type, 1, 32, 1, 1);  
-    void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin",
-					    float_type, 32, 32, 3, 3);  
-    void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin",
-					  float_type, 1, 32, 1, 1);
-    void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin",
-					    float_type, 64, 32, 3, 3);  
-    void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin",
-					  float_type, 1, 64, 1, 1);  
-    void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin",
-					    float_type, 64, 64, 3, 3);  
-    void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin",
-					  float_type, 1, 64, 1, 1);
-    void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin",
-					    float_type, 128, 64, 3, 3);  
-    void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin",
-					  float_type, 1, 128, 1, 1);
-    void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin",
-					    float_type, 128, 128, 3, 3);  
-    void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin",
-					  float_type, 1, 128, 1, 1);
-  
-    void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin",
-					   float_type, 1, 1, 2048, 10);  
-    void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin",
-					float_type, 1, 10, 1, 1);  
- 
-  
-    clearTensorMap();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performance profiling 
-    startProfiling();
-
-    //-1.881, 2.09
-    //-0.18,0.174
-    void* conv1_out = ConvLayer_PROMISE(input, -1.881, 2.09, conv1_filter, -0.542,0.371, conv1_bias, -0.066,0.04,
-					1, 1, 1, 1, 0, 0, 0, -1,1, 9);
-
-    void* conv2_out = ConvLayer_PROMISE(conv1_out, -1,1, conv2_filter, -0.424,0.314, conv2_bias, -0.355,-0.172, 
-					1, 1, 1, 1, 0, 2, 0, -1,1, 9);
-    
-    void* conv3_out = ConvLayer_PROMISE(conv2_out, -1,1, conv3_filter, -0.441,0.795, conv3_bias, -0.804,0.753, 
-   				       1, 1, 1, 1, 0, 0, 0, -1,1, 9);
-
-    void* conv4_out = ConvLayer_PROMISE(conv3_out, -1,1, conv4_filter, -0.288,0.31, conv4_bias, -0.635,0.29, 
-				        1, 1, 1, 1, 0, 2, 0, -1,1, 9);
-
-    void* conv5_out = ConvLayer_PROMISE(conv4_out, -1,1, conv5_filter, -0.279,0.376, conv5_bias, -1.13, 1.239,
-					1, 1, 1, 1, 0, 0, 0, -1,1, 9);
-
-    void* conv6_out = ConvLayer_PROMISE(conv5_out, -1,1, conv6_filter, -0.27,0.279, conv6_bias, -0.503,0.127,
-					1, 1, 1, 1, 0, 2, 0, -1,1, 9);
-
-    // No Activation
-    void* fc1_out = FCLayer_PROMISE(conv6_out, -1,1, fc1_weights, -0.242,0.584, fc1_bias, -0.537,0.558, -1, -1,1, 9);
-    
-    void* result = tensorSoftmax(fc1_out);
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(1);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_promise_quant.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_promise_quant.cc
deleted file mode 100644
index 3c3bc018518cf6ab3ff7dd7a608900308efa1e49..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_promise_quant.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(1); 
-
-  int total_runs = 1; 
-  for (int i = 0 ; i < total_runs; i++){ 
-
-    startMemTracking(); 
-
-    int test_input_size = 5000; 
-    int batch_size = 5000;
-    int offset = 5000;
-    int batch_count = test_input_size / batch_size; 
-    float final_accuracy = 0.0; 
-
-    for(int i = 0; i < batch_count; i++){
-      
-      std::string dir_prefix = std::string("../model_params/alexnet2_cifar10_test/"); 
-      std::string input_path =  dir_prefix + std::string("input.bin"); 
-      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-      std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-      void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-      void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,32,32,3,3); 
-      std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-      void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-      void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,32,3,3); 
-      std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-      void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-      void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,64,64,3,3); 
-      std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-      void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-      void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,128,64,3,3); 
-      std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-      void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,128,1,1); 
-      std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-      void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,128,128,3,3); 
-      std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-      void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,128,1,1); 
-      std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-      void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); 
-      std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-      void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-      int start = i * batch_size + offset; 
-      int end = (i + 1) * batch_size + offset; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = ConvLayer_PROMISE(input, -1.8816435, 2.0934134, conv2d_1_w, -0.5421946, 0.3710851, conv2d_1_b, -0.06697306, 0.040868897, 1, 1, 1, 1, -1, 0, 0, -0.7750273948907852, 0.7799443006515503, 9); 
-      void* var_1 = ConvLayer_PROMISE(var_0, -0.7750273948907852, 0.7799443006515503, conv2d_2_w, -0.42474225, 0.31460348, conv2d_2_b, -0.3557253, -0.17281663, 1, 1, 1, 1, 0, 2, 0, -0.8086670643091202, 0.98395329773426, 9); 
-      void* var_2 = ConvLayer_PROMISE(var_1, -0.8086670643091202, 0.98395329773426, conv2d_3_w, -0.44134507, 0.79587924, conv2d_3_b, -0.80424446, 0.75330096, 1, 1, 1, 1, -1, 0, 0, -0.9956784248352051, 0.9985664486885071, 9); 
-      void* var_3 = ConvLayer_PROMISE(var_2, -0.9956784248352051, 0.9985664486885071, conv2d_4_w, -0.2883836, 0.31025785, conv2d_4_b, -0.6353164, 0.29015934, 1, 1, 1, 1, 0, 2, 0, -0.9932191967964172, 0.9923790097236633, 9); 
-      void* var_4 = ConvLayer_PROMISE(var_3, -0.9932191967964172, 0.9923790097236633, conv2d_5_w, -0.2792431, 0.37689754, conv2d_5_b, -1.1379756, 1.2391574, 1, 1, 1, 1, -1, 0, 0, -0.9999013543128967, 0.9999108910560608, 9); 
-      void* var_5 = ConvLayer_PROMISE(var_4, -0.9999013543128967, 0.9999108910560608, conv2d_6_w, -0.27078503, 0.27942517, conv2d_6_b, -0.503003, 0.12762362, 1, 1, 1, 1, 0, 2, 0, -0.991036117374897, 0.9714049702882765, 9); 
-      void* var_6 = FCLayer_PROMISE(var_5, -0.991036117374897, 0.9714049702882765, dense_1_w, -0.24273404, 0.5845544, dense_1_b, -0.53745, 0.558251, -1, -119.27973731994629, -25.226281957626327, 9); 
-      void* var_7 = tensorSoftmax(var_6); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_7); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy); 
-  }
-
-  dumpExecutionAccuracies(); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_valid.cc
deleted file mode 100644
index 1799480796fb988d0e9624fb482339d2345d2728..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_valid.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  int total_runs = 1; 
-  for (int i = 0 ; i < total_runs; i++){ 
-
-    startMemTracking(); 
-
-    int test_input_size = 5000; 
-    int batch_size = 5000; 
-    int batch_count = test_input_size / batch_size; 
-    float final_accuracy = 0.0; 
-
-    for(int i = 0; i < batch_count; i++){
-      
-      std::string dir_prefix = std::string("../model_params/alexnet2_cifar10_test/"); 
-      std::string input_path =  dir_prefix + std::string("input.bin"); 
-      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-      std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-      void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-      void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,32,32,3,3); 
-      std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-      void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-      void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,32,3,3); 
-      std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-      void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-      void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,64,64,3,3); 
-      std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-      void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-      void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,128,64,3,3); 
-      std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-      void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,128,1,1); 
-      std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-      void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,128,128,3,3); 
-      std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-      void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,128,1,1); 
-      std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-      void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); 
-      std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-      void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-      int start = i * batch_size; 
-      int end = (i + 1) * batch_size; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = ConvLayer_PROMISE(input, -1.8816435, 2.0934134, conv2d_1_w, -0.5421946, 0.3710851, conv2d_1_b, -0.06697306, 0.040868897, 1, 1, 1, 1, -1, 0, 0, -0.7750273948907852, 0.7799443006515503, 9); 
-      void* var_1 = ConvLayer_PROMISE(var_0, -0.7750273948907852, 0.7799443006515503, conv2d_2_w, -0.42474225, 0.31460348, conv2d_2_b, -0.3557253, -0.17281663, 1, 1, 1, 1, 0, 2, 0, -0.8086670643091202, 0.98395329773426, 9); 
-      void* var_2 = ConvLayer_PROMISE(var_1, -0.8086670643091202, 0.98395329773426, conv2d_3_w, -0.44134507, 0.79587924, conv2d_3_b, -0.80424446, 0.75330096, 1, 1, 1, 1, -1, 0, 0, -0.9956784248352051, 0.9985664486885071, 9); 
-      void* var_3 = ConvLayer_PROMISE(var_2, -0.9956784248352051, 0.9985664486885071, conv2d_4_w, -0.2883836, 0.31025785, conv2d_4_b, -0.6353164, 0.29015934, 1, 1, 1, 1, 0, 2, 0, -0.9932191967964172, 0.9923790097236633, 9); 
-      void* var_4 = ConvLayer_PROMISE(var_3, -0.9932191967964172, 0.9923790097236633, conv2d_5_w, -0.2792431, 0.37689754, conv2d_5_b, -1.1379756, 1.2391574, 1, 1, 1, 1, -1, 0, 0, -0.9999013543128967, 0.9999108910560608, 9); 
-      void* var_5 = ConvLayer_PROMISE(var_4, -0.9999013543128967, 0.9999108910560608, conv2d_6_w, -0.27078503, 0.27942517, conv2d_6_b, -0.503003, 0.12762362, 1, 1, 1, 1, 0, 2, 0, -0.991036117374897, 0.9714049702882765, 9); 
-      void* var_6 = FCLayer_PROMISE(var_5, -0.991036117374897, 0.9714049702882765, dense_1_w, -0.24273404, 0.5845544, dense_1_b, -0.53745, 0.558251, -1, -119.27973731994629, -25.226281957626327, 9); 
-      void* var_7 = tensorSoftmax(var_6); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_7); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy); 
-  }
-
-  dumpExecutionAccuracies(); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_cifar10_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_cifar10_promise.cc
deleted file mode 100644
index 3e39f5cf03ce25511429d84ada9812fef0998194..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_cifar10_promise.cc
+++ /dev/null
@@ -1,158 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 100000;
-  }
-  
-  printf("********* Lenet-2 Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 5000;
-
-  uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size);
- 
-  for(int i = 0; i < total_runs; i++){
-
-    void* input = readTrainedWeights("../model_params/alexnet_cifar10/norm_cifar_input.bin",
-				     float_type,
-				     test_batch_size, 3, 32, 32);
-
-    void* conv1_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv1.bin",
-					    float_type, 64, 3, 11, 11);  
-    void* conv1_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv1_bias.bin",
-					  float_type, 1, 64, 1, 1);  
-    void* conv2_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv2.bin",
-					    float_type, 192, 64, 5, 5);  
-    void* conv2_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv2_bias.bin",
-					  float_type, 1, 192, 1, 1);
-
-    void* conv3_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv3.bin",
-					    float_type, 384, 192, 3, 3);  
-    void* conv3_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv3_bias.bin",
-					  float_type, 1, 384, 1, 1);  
-    void* conv4_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv4.bin",
-					    float_type, 256, 384, 3, 3);  
-    void* conv4_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv4_bias.bin",
-					  float_type, 1, 256, 1, 1);
-    void* conv5_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv5.bin",
-					    float_type, 256, 256, 3, 3);  
-    void* conv5_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv5_bias.bin",
-					  float_type, 1, 256, 1, 1);
-  
-    void* fc1_weights = readTrainedWeights("../model_params/alexnet_cifar10/fc1.bin",
-					   float_type, 1, 1, 4096, 10);  
-    void* fc1_bias = readTrainedWeights("../model_params/alexnet_cifar10/fc1_bias.bin",
-					float_type, 1, 10, 1, 1);  
-    
-  
-    clearTensorMap();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performance profiling 
-    startProfiling();
-
-    //-1.881, 2.09
-    //-0.18,0.174
-    void* conv1_out = ConvLayer_PROMISE(input, -1.881, 2.09, conv1_filter, -0.345,0.331, conv1_bias, -0.76,0.59,
-					5, 5, 1, 1, 0, 2, 0, -1,1, 9);
-
-    void* conv2_out = ConvLayer_PROMISE(conv1_out, -1,1, conv2_filter, -0.22,0.264, conv2_bias, -0.448,0.343, 
-					2, 2, 1, 1, 0, 2, 0, -1,1, 9);
-    
-    void* conv3_out = ConvLayer_PROMISE(conv2_out, -1,1, conv3_filter, -0.192,0.187, conv3_bias, -0.91,0.67, 
-   				       1, 1, 1, 1, 0, 0, 0, -1,1, 9);
-
-    void* conv4_out = ConvLayer_PROMISE(conv3_out, -1,1, conv4_filter, -0.131,0.141, conv4_bias, -0.416,0.342, 
-				        1, 1, 1, 1, 0, 0, 0, -1,1, 9);
-
-    void* conv5_out = ConvLayer_PROMISE(conv4_out, -1,1, conv5_filter, -0.165,0.188, conv5_bias, -0.283,0.062,
-					1, 1, 1, 1, 0, 2, 0, -1,1, 9);
-
-    // No Activation
-    void* fc1_out = FCLayer_PROMISE(conv5_out, -1,1, fc1_weights, -0.181,0.233, fc1_bias, -0.063,0.137, -1, -1,1, 9);
-    
-    void* result = tensorSoftmax(fc1_out);
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(1);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_promise_quant.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_promise_quant.cc
deleted file mode 100644
index 6b2b0e80ba92fa449cdd06036946101df76317e7..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_promise_quant.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(1); 
-
-  int total_runs = 1; 
-  for (int i = 0 ; i < total_runs; i++){ 
-
-    startMemTracking(); 
-
-    int test_input_size = 5000; 
-    int batch_size = 5000;
-    int offset = 5000;
-    int batch_count = test_input_size / batch_size; 
-    float final_accuracy = 0.0; 
-
-    for(int i = 0; i < batch_count; i++){ 
-
-      std::string dir_prefix = std::string("../model_params/alexnet_cifar10_test/"); 
-      std::string input_path =  dir_prefix + std::string("input.bin"); 
-      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); 
-      std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-      void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-      void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); 
-      std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-      void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); 
-      std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-      void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); 
-      std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-      void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); 
-      std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-      void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); 
-      std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-      void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); 
-      std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-      void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); 
-      std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-      void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-      std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-      void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); 
-      std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-      void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-      int start = i * batch_size + offset; 
-      int end = (i + 1) * batch_size + offset; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = ConvLayer_PROMISE(input, -1.8816426241908337, 2.0934095498544254, conv2d_1_w, -0.33087718, 0.3323643, conv2d_1_b, -0.7782218, 0.6020472, 5, 5, 1, 1, 0, 2, 0, -0.978641152381897, 0.9989452958106995, 9); 
-      void* var_1 = ConvLayer_PROMISE(var_0, -0.978641152381897, 0.9989452958106995, conv2d_2_w, -0.2095158, 0.33543423, conv2d_2_b, -0.45020863, 0.30596754, 2, 2, 1, 1, 0, 2, 0, -0.9997039437294006, 0.999930202960968, 9); 
-      void* var_2 = ConvLayer_PROMISE(var_1, -0.9997039437294006, 0.999930202960968, conv2d_3_w, -0.1715614, 0.17037082, conv2d_3_b, -0.6519161, 0.5939945, 1, 1, 1, 1, -1, 0, 0, -0.9999336004257202, 0.999940037727356, 9); 
-      void* var_3 = ConvLayer_PROMISE(var_2, -0.9999336004257202, 0.999940037727356, conv2d_4_w, -0.15575546, 0.14456555, conv2d_4_b, -0.55873865, 0.4704539, 1, 1, 1, 1, -1, 0, 0, -0.9999991059303284, 0.9999993443489075, 9); 
-      void* var_4 = ConvLayer_PROMISE(var_3, -0.9999991059303284, 0.9999993443489075, conv2d_5_w, -0.16108225, 0.16864482, conv2d_5_b, -0.22135437, 0.10401678, 1, 1, 1, 1, 0, 2, 0, -0.9994344115257263, 0.9996342062950134, 9); 
-      void* var_5 = FCLayer_PROMISE(var_4, -0.9994344115257263, 0.9996342062950134, dense_1_w, -0.18183032, 0.19018902, dense_1_b, -0.07189204, 0.106005594, -1, -15.076565380096437, 19.422585220336913, 9); 
-      void* var_6 = tensorSoftmax(var_5); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_6); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy); 
-  }
-
-  //dumpExecutionAccuracies(); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_valid.cc
deleted file mode 100644
index 7a0a40adb30367866635993de3de94ca1413938e..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_valid.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  int total_runs = 20; 
-  for (int i = 0 ; i < total_runs; i++){ 
-
-    startMemTracking(); 
-
-    int test_input_size = 5000; 
-    int batch_size = 5000;
-    int offset = 0;
-    int batch_count = test_input_size / batch_size; 
-    float final_accuracy = 0.0; 
-
-    for(int i = 0; i < batch_count; i++){ 
-
-      std::string dir_prefix = std::string("../model_params/alexnet_cifar10_test/"); 
-      std::string input_path =  dir_prefix + std::string("input.bin"); 
-      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); 
-      std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-      void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-      void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); 
-      std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-      void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); 
-      std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-      void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); 
-      std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-      void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); 
-      std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-      void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); 
-      std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-      void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); 
-      std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-      void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); 
-      std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-      void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-      std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-      void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); 
-      std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-      void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-      int start = i * batch_size + offset; 
-      int end = (i + 1) * batch_size + offset; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = ConvLayer_PROMISE(input, -1.8816426241908337, 2.0934095498544254, conv2d_1_w, -0.33087718, 0.3323643, conv2d_1_b, -0.7782218, 0.6020472, 5, 5, 1, 1, 0, 2, 0, -0.978641152381897, 0.9989452958106995, 9); 
-      void* var_1 = ConvLayer_PROMISE(var_0, -0.978641152381897, 0.9989452958106995, conv2d_2_w, -0.2095158, 0.33543423, conv2d_2_b, -0.45020863, 0.30596754, 2, 2, 1, 1, 0, 2, 0, -0.9997039437294006, 0.999930202960968, 9); 
-      void* var_2 = ConvLayer_PROMISE(var_1, -0.9997039437294006, 0.999930202960968, conv2d_3_w, -0.1715614, 0.17037082, conv2d_3_b, -0.6519161, 0.5939945, 1, 1, 1, 1, -1, 0, 0, -0.9999336004257202, 0.999940037727356, 9); 
-      void* var_3 = ConvLayer_PROMISE(var_2, -0.9999336004257202, 0.999940037727356, conv2d_4_w, -0.15575546, 0.14456555, conv2d_4_b, -0.55873865, 0.4704539, 1, 1, 1, 1, -1, 0, 0, -0.9999991059303284, 0.9999993443489075, 9); 
-      void* var_4 = ConvLayer_PROMISE(var_3, -0.9999991059303284, 0.9999993443489075, conv2d_5_w, -0.16108225, 0.16864482, conv2d_5_b, -0.22135437, 0.10401678, 1, 1, 1, 1, 0, 2, 0, -0.9994344115257263, 0.9996342062950134, 9); 
-      void* var_5 = FCLayer_PROMISE(var_4, -0.9994344115257263, 0.9996342062950134, dense_1_w, -0.18183032, 0.19018902, dense_1_b, -0.07189204, 0.106005594, -1, -15.076565380096437, 19.422585220336913, 9); 
-      void* var_6 = tensorSoftmax(var_5); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_6); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy); 
-  }
-
-  dumpExecutionAccuracies(); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_25.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_25.cpp
deleted file mode 100644
index 377082d74e85d8394e097d9281003d482131bae5..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_25.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include <algorithm>
-#include <cassert>
-#include <fstream>
-#include <string>
-
-const size_t n_channels = 3;
-
-Tensor *gaussianFilter_(float div) {
-  std::vector<float> gauss_data = {1,  4, 6,  4,  1,  4, 16, 24, 16,
-                                   4,  6, 24, 36, 24, 6, 4,  16, 24,
-                                   16, 4, 1,  4,  6,  4, 1};
-  for (float &f : gauss_data)
-    f /= div;
-  return (Tensor *)createFilterFromData(
-      CUDNN_DATA_FLOAT, gauss_data.data(), 5, 5, 1);
-}
-
-Tensor *gaussianFilter() { return gaussianFilter_(16.0); }
-
-void *normalize(void *image) {
-  auto *max_1D = autotuner_tensorReduce(image, 2, MathOp::Max);
-  auto *max = autotuner_tensorReduce(max_1D, 3, MathOp::Max);
-  auto *img_norm = autotuner_tensorMap2(MathOp::Div, image, max);
-  return img_norm;
-}
-
-extern std::vector<size_t> sizes(Tensor *t);
-
-void forward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[1] == 3);
-  sz[0] = sz[0] * sz[1];
-  sz[1] = 1;
-  reshape(tensor, sz);
-}
-
-void backward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[0] % 3 == 0);
-  sz[0] = sz[0] / 3;
-  sz[1] = 3;
-  reshape(tensor, sz);
-}
-
-void *sharpen(void *image) {
-  void *gaussian = gaussianFilter();
-  forward_reshape(image);
-  void *blurred = ConvLayer_PROMISE(
-      image, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 2, 2, 1, 1, 0, 0,
-      -1, 0.0, 0.0, 0);
-  backward_reshape(blurred);
-  backward_reshape(image);
-  void *blurred_norm = normalize(blurred);
-  void *image_norm = normalize(image);
-  void *ret =
-      autotuner_tensorMap2(MathOp::AddWeighted, blurred_norm, image_norm);
-  return ret;
-}
-
-void *main_procedure(void *fg, void *bg) {
-  void *g_bg = sharpen(bg);
-  void *g_fg = sharpen(fg);
-  void *ret = autotuner_tensorMap2(MathOp::Blend2, g_bg, g_fg);
-  return ret;
-}
-
-const size_t batch_size = 500, total_max = 3000;
-const float psnr_threshold = 25.0;
-
-int main() {
-  const char *input1_path = "../model_params/image_processing_5k";
-  const char *input2_path = "../model_params/image_processing_5k_shuffled";
-  const char *ref_output_path = "../model_params/blending_ref_output";
-  std::vector<float> psnr;
-
-  llvm_hpvm_initTensorRt(0);
-  startMemTracking();
-  size_t bstart = 0;
-  while (true) {
-    auto *background = readDataSet(input1_path, bstart, batch_size, n_channels),
-         *foreground = readDataSet(input2_path, bstart, batch_size, n_channels);
-    if (!background || !foreground)
-      break;
-
-    auto *result = main_procedure(foreground, background);
-
-    auto *ref_output = readDataSet(ref_output_path, bstart, batch_size, n_channels);
-    std::vector<float> psnr_batch = PSNR(ref_output, result);
-    std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr));
-    bstart += batch_size;
-    if (bstart >= total_max)
-      break;
-    freeBatchMemory();
-  }
-  float violation = violationRate(psnr, psnr_threshold);
-  float mean_psnr = mean(psnr);
-  std::ofstream of("final_accuracy");
-  of << violation * 100 << ", " << mean_psnr << '\n';
-  return 0;
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_30.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_30.cpp
deleted file mode 100644
index 5988eb11fd63b34e045445f9fab98c4e934e646d..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_30.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include <algorithm>
-#include <cassert>
-#include <fstream>
-#include <string>
-
-const size_t n_channels = 3;
-
-Tensor *gaussianFilter_(float div) {
-  std::vector<float> gauss_data = {1,  4, 6,  4,  1,  4, 16, 24, 16,
-                                   4,  6, 24, 36, 24, 6, 4,  16, 24,
-                                   16, 4, 1,  4,  6,  4, 1};
-  for (float &f : gauss_data)
-    f /= div;
-  return (Tensor *)createFilterFromData(
-      CUDNN_DATA_FLOAT, gauss_data.data(), 5, 5, 1);
-}
-
-Tensor *gaussianFilter() { return gaussianFilter_(16.0); }
-
-void *normalize(void *image) {
-  auto *max_1D = autotuner_tensorReduce(image, 2, MathOp::Max);
-  auto *max = autotuner_tensorReduce(max_1D, 3, MathOp::Max);
-  auto *img_norm = autotuner_tensorMap2(MathOp::Div, image, max);
-  return img_norm;
-}
-
-extern std::vector<size_t> sizes(Tensor *t);
-
-void forward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[1] == 3);
-  sz[0] = sz[0] * sz[1];
-  sz[1] = 1;
-  reshape(tensor, sz);
-}
-
-void backward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[0] % 3 == 0);
-  sz[0] = sz[0] / 3;
-  sz[1] = 3;
-  reshape(tensor, sz);
-}
-
-void *sharpen(void *image) {
-  void *gaussian = gaussianFilter();
-  forward_reshape(image);
-  void *blurred = ConvLayer_PROMISE(
-      image, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 2, 2, 1, 1, 0, 0,
-      -1, 0.0, 0.0, 0);
-  backward_reshape(blurred);
-  backward_reshape(image);
-  void *blurred_norm = normalize(blurred);
-  void *image_norm = normalize(image);
-  void *ret =
-      autotuner_tensorMap2(MathOp::AddWeighted, blurred_norm, image_norm);
-  return ret;
-}
-
-void *main_procedure(void *fg, void *bg) {
-  void *g_bg = sharpen(bg);
-  void *g_fg = sharpen(fg);
-  void *ret = autotuner_tensorMap2(MathOp::Blend2, g_bg, g_fg);
-  return ret;
-}
-
-const size_t batch_size = 500, total_max = 3000;
-const float psnr_threshold = 30.0;
-
-int main() {
-  const char *input1_path = "../model_params/image_processing_5k";
-  const char *input2_path = "../model_params/image_processing_5k_shuffled";
-  const char *ref_output_path = "../model_params/blending_ref_output";
-  std::vector<float> psnr;
-
-  llvm_hpvm_initTensorRt(1);
-  startMemTracking();
-  size_t bstart = 0;
-  while (true) {
-    auto *background = readDataSet(input1_path, bstart, batch_size, n_channels),
-         *foreground = readDataSet(input2_path, bstart, batch_size, n_channels);
-    if (!background || !foreground)
-      break;
-
-    auto *result = main_procedure(foreground, background);
-
-    auto *ref_output = readDataSet(ref_output_path, bstart, batch_size, n_channels);
-    std::vector<float> psnr_batch = PSNR(ref_output, result);
-    std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr));
-    bstart += batch_size;
-    if (bstart >= total_max)
-      break;
-    freeBatchMemory();
-  }
-  float violation = violationRate(psnr, psnr_threshold);
-  float mean_psnr = mean(psnr);
-  std::ofstream of("final_accuracy");
-  of << violation * 100 << ", " << mean_psnr << '\n';
-  return 0;
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_25.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_25.cpp
deleted file mode 100644
index 4024e542f5338df556b7ea51a171fb0ce04afce8..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_25.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-#include <fstream>
-
-Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
-  int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-  auto *data = new float[w * h];
-  float sum = 0.0f;
-  for (int64_t i = -m; i <= m; i++)
-    for (int64_t j = -n; j <= n; j++) {
-      size_t idx = (i + m) * h + (j + n);
-      float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-      data[idx] = exp(exponent);
-      sum += data[idx];
-    }
-  if (sum != 0.0f)
-    for (size_t i = 0; i < w * h; i++)
-      data[i] /= sum;
-  return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
-}
-
-std::pair<Tensor *, Tensor *> getSobelKernels() {
-  std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1});
-  std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1});
-  auto *t1 =
-      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1);
-  auto *t2 =
-      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1);
-  return std::make_pair(t1, t2);
-}
-
-void *main_procedure(void *dataset) {
-  Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1);
-  Tensor *kernel_x, *kernel_y;
-  std::tie(kernel_x, kernel_y) = getSobelKernels();
-
-  // 0. Grayscale
-  auto *summed_image = autotuner_tensorReduce(dataset, 1, MathOp::Add);
-  auto *grayscale_image = autotuner_tensorMap1(MathOp::Avg3, summed_image);
-  // 1. Denoise
-  auto *image2 = ConvLayer_PROMISE(
-      grayscale_image, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 2, 2, 1,
-      1, 0, 0, -1, 0.0, 0.0, 0);
-  // 2. Get edge gradient / direction
-  auto *grad_x = ConvLayer_PROMISE(
-      image2, 0.0, 0.0, kernel_x, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0,
-      -1, 0.0, 0.0, 0);
-  auto *grad_y = ConvLayer_PROMISE(
-      image2, 0.0, 0.0, kernel_y, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0,
-      -1, 0.0, 0.0, 0);
-  auto *grad_mag = autotuner_tensorMap2(MathOp::Hypot, grad_x, grad_y);
-  // 2.5. Normalize grad magnitude
-  auto *grad_max_1D = autotuner_tensorReduce(grad_mag, 2, MathOp::Max);
-  auto *grad_max = autotuner_tensorReduce(grad_max_1D, 3, MathOp::Max);
-  auto *grad_mag_norm = autotuner_tensorMap2(MathOp::Div, grad_mag, grad_max);
-  return grad_mag_norm;
-}
-
-const size_t batch_size = 500, total_max = 3000;
-const float psnr_threshold = 25.0;
-
-int main() {
-  const char *input_path = "../model_params/image_processing_5k";
-  const char *ref_output_path = "../model_params/canny_ref_output";
-  std::vector<float> psnr;
-  llvm_hpvm_initTensorRt(0);
-  size_t bstart = 0;
-  startMemTracking();
-  while (true) {
-    Tensor *batch = readDataSet(input_path, bstart, batch_size);
-    if (batch == nullptr)
-      break;
-
-    auto *result = main_procedure(batch);
-    auto *ref_output = readDataSet(ref_output_path, bstart, batch_size, 1);
-    std::vector<float> psnr_batch = PSNR(ref_output, result);
-    std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr));
-    bstart += batch_size;
-    if (bstart >= total_max)
-      break;
-    freeBatchMemory();
-  }
-  float violation = violationRate(psnr, psnr_threshold);
-  float mean_psnr = mean(psnr);
-  std::ofstream of("final_accuracy");
-  of << violation * 100 << ", " << mean_psnr << '\n';
-  return 0;
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_30.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_30.cpp
deleted file mode 100644
index b3737ff204cf2cb1eb9ea7849f6b745cf4139dec..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_30.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-#include <fstream>
-
-Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
-  int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-  auto *data = new float[w * h];
-  float sum = 0.0f;
-  for (int64_t i = -m; i <= m; i++)
-    for (int64_t j = -n; j <= n; j++) {
-      size_t idx = (i + m) * h + (j + n);
-      float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-      data[idx] = exp(exponent);
-      sum += data[idx];
-    }
-  if (sum != 0.0f)
-    for (size_t i = 0; i < w * h; i++)
-      data[i] /= sum;
-  return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
-}
-
-std::pair<Tensor *, Tensor *> getSobelKernels() {
-  std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1});
-  std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1});
-  auto *t1 =
-      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1);
-  auto *t2 =
-      (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1);
-  return std::make_pair(t1, t2);
-}
-
-void *main_procedure(void *dataset) {
-  Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1);
-  Tensor *kernel_x, *kernel_y;
-  std::tie(kernel_x, kernel_y) = getSobelKernels();
-
-  // 0. Grayscale
-  auto *summed_image = autotuner_tensorReduce(dataset, 1, MathOp::Add);
-  auto *grayscale_image = autotuner_tensorMap1(MathOp::Avg3, summed_image);
-  // 1. Denoise
-  auto *image2 = ConvLayer_PROMISE(
-      grayscale_image, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 2, 2, 1,
-      1, 0, 0, -1, 0.0, 0.0, 0);
-  // 2. Get edge gradient / direction
-  auto *grad_x = ConvLayer_PROMISE(
-      image2, 0.0, 0.0, kernel_x, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0,
-      -1, 0.0, 0.0, 0);
-  auto *grad_y = ConvLayer_PROMISE(
-      image2, 0.0, 0.0, kernel_y, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0,
-      -1, 0.0, 0.0, 0);
-  auto *grad_mag = autotuner_tensorMap2(MathOp::Hypot, grad_x, grad_y);
-  // 2.5. Normalize grad magnitude
-  auto *grad_max_1D = autotuner_tensorReduce(grad_mag, 2, MathOp::Max);
-  auto *grad_max = autotuner_tensorReduce(grad_max_1D, 3, MathOp::Max);
-  auto *grad_mag_norm = autotuner_tensorMap2(MathOp::Div, grad_mag, grad_max);
-  return grad_mag_norm;
-}
-
-const size_t batch_size = 500, total_max = 3000;
-const float psnr_threshold = 30.0;
-
-int main() {
-  const char *input_path = "../model_params/image_processing_5k";
-  const char *ref_output_path = "../model_params/canny_ref_output";
-  std::vector<float> psnr;
-  llvm_hpvm_initTensorRt(1);
-  size_t bstart = 0;
-  startMemTracking();
-  while (true) {
-    Tensor *batch = readDataSet(input_path, bstart, batch_size);
-    if (batch == nullptr)
-      break;
-
-    auto *result = main_procedure(batch);
-    auto *ref_output = readDataSet(ref_output_path, bstart, batch_size, 1);
-    std::vector<float> psnr_batch = PSNR(ref_output, result);
-    std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr));
-    bstart += batch_size;
-    if (bstart >= total_max)
-      break;
-    freeBatchMemory();
-  }
-  float violation = violationRate(psnr, psnr_threshold);
-  float mean_psnr = mean(psnr);
-  std::ofstream of("final_accuracy");
-  of << violation * 100 << ", " << mean_psnr << '\n';
-  return 0;
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_25.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_25.cpp
deleted file mode 100644
index 7c4cc55f02b73dee6214ae0ebd14e921334b1f18..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_25.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include <cmath>
-#include <iostream>
-#include <thrust/complex.h>
-#include <vector>
-
-const size_t batch_size = 250, total_max = 3000;
-const size_t n_colors = N_RGB_CHAN;
-const float psnr_threshold = 25.0;
-
-Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
-  int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-  auto *data = new float[w * h];
-  float sum = 0.0f;
-  for (int64_t i = -m; i <= m; i++)
-    for (int64_t j = -n; j <= n; j++) {
-      size_t idx = (i + m) * h + (j + n);
-      float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-      data[idx] = exp(exponent);
-      sum += data[idx];
-    }
-  if (sum != 0.0f)
-    for (size_t i = 0; i < w * h; i++)
-      data[i] /= sum;
-  return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
-}
-
-extern std::vector<size_t> sizes(Tensor *t);
-
-void forward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[1] == 3);
-  sz[0] = sz[0] * sz[1];
-  sz[1] = 1;
-  reshape(tensor, sz);
-}
-
-void backward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[0] % 3 == 0);
-  sz[0] = sz[0] / 3;
-  sz[1] = 3;
-  reshape(tensor, sz);
-}
-
-static void *normalize(void *image) {
-  auto *max_1D = autotuner_tensorReduce(image, 2, MathOp::Max);
-  auto *max = autotuner_tensorReduce(max_1D, 3, MathOp::Max);
-  auto *img_norm = autotuner_tensorMap2(MathOp::Div, image, max);
-  freeTensor(max_1D);
-  freeTensor(max);
-  return img_norm;
-}
-
-void *main_procedure(void *dataset, void *gaussian) {
-  forward_reshape(dataset);
-  auto *sharpened = ConvLayer_PROMISE(
-      dataset, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0,
-      0, -1, 0.0, 0.0, 0);
-  backward_reshape(sharpened);
-  return normalize(sharpened);
-}
-
-int main(int argc, char *argv[]) {
-  const char *input1_path = "../model_params/image_processing_5k_128_128";
-  // const char *output_path = "output/";
-  const char *ref_path = "../model_params/fft_ref_output";
-
-  std::vector<float> psnr;
-  llvm_hpvm_initTensorRt(0);
-  Tensor *gaussian = gaussianFilter(1, 3, 3, 1);
-  size_t bstart = 0;
-  startMemTracking();
-  while (true) {
-    Tensor *batch = readDataSet(input1_path, bstart, batch_size);
-    if (batch == nullptr) // If end of dataset
-      break;
-
-    auto *result = main_procedure(batch, gaussian);
-
-    // saveDataSet(output_path, (Tensor *)result, bstart);
-    auto *ref_output = readDataSet(ref_path, bstart, batch_size, n_colors);
-    std::vector<float> psnr_batch = PSNR(ref_output, result);
-    std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr));
-
-    bstart += batch_size;
-    if (bstart >= total_max)
-      break;
-    freeBatchMemory();
-  }
-  float violation = violationRate(psnr, psnr_threshold);
-  float mean_psnr = mean(psnr);
-  std::ofstream of("final_accuracy");
-  of << violation * 100 << ", " << mean_psnr << '\n';
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_30.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_30.cpp
deleted file mode 100644
index 3a7b413faa1795b1d14435875dff62ae08cb974b..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_30.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include <cmath>
-#include <iostream>
-#include <thrust/complex.h>
-#include <vector>
-
-const size_t batch_size = 250, total_max = 3000;
-const size_t n_colors = N_RGB_CHAN;
-const float psnr_threshold = 30.0;
-
-Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
-  int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-  auto *data = new float[w * h];
-  float sum = 0.0f;
-  for (int64_t i = -m; i <= m; i++)
-    for (int64_t j = -n; j <= n; j++) {
-      size_t idx = (i + m) * h + (j + n);
-      float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-      data[idx] = exp(exponent);
-      sum += data[idx];
-    }
-  if (sum != 0.0f)
-    for (size_t i = 0; i < w * h; i++)
-      data[i] /= sum;
-  return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
-}
-
-extern std::vector<size_t> sizes(Tensor *t);
-
-void forward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[1] == 3);
-  sz[0] = sz[0] * sz[1];
-  sz[1] = 1;
-  reshape(tensor, sz);
-}
-
-void backward_reshape(void *t) {
-  auto *tensor = (Tensor *)t;
-  std::vector<size_t> sz = sizes(tensor);
-  assert(sz[0] % 3 == 0);
-  sz[0] = sz[0] / 3;
-  sz[1] = 3;
-  reshape(tensor, sz);
-}
-
-static void *normalize(void *image) {
-  auto *max_1D = autotuner_tensorReduce(image, 2, MathOp::Max);
-  auto *max = autotuner_tensorReduce(max_1D, 3, MathOp::Max);
-  auto *img_norm = autotuner_tensorMap2(MathOp::Div, image, max);
-  freeTensor(max_1D);
-  freeTensor(max);
-  return img_norm;
-}
-
-void *main_procedure(void *dataset, void *gaussian) {
-  forward_reshape(dataset);
-  auto *sharpened = ConvLayer_PROMISE(
-      dataset, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0,
-      0, -1, 0.0, 0.0, 0);
-  backward_reshape(sharpened);
-  return normalize(sharpened);
-}
-
-int main(int argc, char *argv[]) {
-  const char *input1_path = "../model_params/image_processing_5k_128_128";
-  // const char *output_path = "output/";
-  const char *ref_path = "../model_params/fft_ref_output";
-
-  std::vector<float> psnr;
-  llvm_hpvm_initTensorRt(1);
-  Tensor *gaussian = gaussianFilter(1, 3, 3, 1);
-  size_t bstart = 0;
-  startMemTracking();
-  while (true) {
-    Tensor *batch = readDataSet(input1_path, bstart, batch_size);
-    if (batch == nullptr) // If end of dataset
-      break;
-
-    auto *result = main_procedure(batch, gaussian);
-
-    // saveDataSet(output_path, (Tensor *)result, bstart);
-    auto *ref_output = readDataSet(ref_path, bstart, batch_size, n_colors);
-    std::vector<float> psnr_batch = PSNR(ref_output, result);
-    std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr));
-
-    bstart += batch_size;
-    if (bstart >= total_max)
-      break;
-    freeBatchMemory();
-  }
-  float violation = violationRate(psnr, psnr_threshold);
-  float mean_psnr = mean(psnr);
-  std::ofstream of("final_accuracy");
-  of << violation * 100 << ", " << mean_psnr << '\n';
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/lenet_promise_relu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/lenet_promise_relu.cc
deleted file mode 100644
index 5c7699026fe6e0860718e5986f4fec990ab08c6c..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/lenet_promise_relu.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/lenet_relu/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  void* input = readTrainedWeights(input_path.c_str(), 0,10000,1,28,28); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  uint8_t* labels = readLabels(labels_path.c_str(),10000); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-
-
-  void* var_0 = ConvLayer_PROMISE(input, 0.0, 1.0, conv2d_1_w, -0.2722561, 0.25817025,
-				  conv2d_1_b, -0.041063767, 0.031912163,
-				  2, 2, 1, 1, 0, 2, 1, 0.0, 1.5512946, 9); 
-  void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.5512946, conv2d_2_w, -0.17580177, 0.16332611,
-				  conv2d_2_b, -0.041385915, 0.05869476,
-				  2, 2, 1, 1, -1, 0, 1, 0.0, 4.916329, 9); 
-  void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 4.916329, conv2d_3_w, -0.20324017, 0.18275258,
-				  conv2d_3_b, -0.039915435, 0.04589232,
-				  1, 1, 2, 2, -1, 0, 1, 0.0, 9.447418, 9); 
-  void* var_3 = FCLayer_PROMISE(var_2, 0.0, 9.447418, dense_1_w, -0.10757191, 0.123126,
-				dense_1_b, -0.025070198, 0.027000334, 1, 0.0, 9.926857, 9); 
-  void* var_4 = FCLayer_PROMISE(var_3, 0.0, 9.926857, dense_2_w, -0.18867673, 0.16425411,
-				dense_2_b, -0.012622595, 0.04586973, 1, 0.0, 42.018578, 9); 
-  void* var_5 = tensorSoftmax(var_4); 
-
-  computeAccuracy2(labels,10000,var_5); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_quant.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_quant.cc
deleted file mode 100644
index 3cb28def9b48bf29f3cffd5611991b0fbaeb4c55..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_quant.cc
+++ /dev/null
@@ -1,419 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-llvm_hpvm_initTensorRt(1); 
-
-int total_runs = 1; 
-for (int i = 0 ; i < total_runs; i++){ 
-
-
-startMemTracking(); 
-
-int test_input_size = 10000; 
-int batch_size = 2000; 
-int batch_count = test_input_size / batch_size; 
-float final_accuracy = 0.0; 
-
-for(int i = 0; i < batch_count; i++){ 
-
-
-std::string dir_prefix = std::string("../../keras/data/mobilenet_quant/"); 
-std::string input_path =  dir_prefix + std::string("input.bin"); 
-std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-std::string depthwise_conv2d_7_w_path =  dir_prefix + std::string("depthwise_conv2d_7_w.bin"); 
-void* depthwise_conv2d_7_w =  readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); 
-std::string batch_normalization_14_gamma_path =  dir_prefix + std::string("batch_normalization_14_gamma.bin"); 
-void* batch_normalization_14_gamma =  readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_14_beta_path =  dir_prefix + std::string("batch_normalization_14_beta.bin"); 
-void* batch_normalization_14_beta =  readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_14_mean_path =  dir_prefix + std::string("batch_normalization_14_mean.bin"); 
-void* batch_normalization_14_mean =  readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_14_variance_path =  dir_prefix + std::string("batch_normalization_14_variance.bin"); 
-void* batch_normalization_14_variance =  readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); 
-std::string batch_normalization_15_gamma_path =  dir_prefix + std::string("batch_normalization_15_gamma.bin"); 
-void* batch_normalization_15_gamma =  readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_15_beta_path =  dir_prefix + std::string("batch_normalization_15_beta.bin"); 
-void* batch_normalization_15_beta =  readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_15_mean_path =  dir_prefix + std::string("batch_normalization_15_mean.bin"); 
-void* batch_normalization_15_mean =  readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_15_variance_path =  dir_prefix + std::string("batch_normalization_15_variance.bin"); 
-void* batch_normalization_15_variance =  readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); 
-std::string depthwise_conv2d_8_w_path =  dir_prefix + std::string("depthwise_conv2d_8_w.bin"); 
-void* depthwise_conv2d_8_w =  readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); 
-std::string batch_normalization_16_gamma_path =  dir_prefix + std::string("batch_normalization_16_gamma.bin"); 
-void* batch_normalization_16_gamma =  readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_16_beta_path =  dir_prefix + std::string("batch_normalization_16_beta.bin"); 
-void* batch_normalization_16_beta =  readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_16_mean_path =  dir_prefix + std::string("batch_normalization_16_mean.bin"); 
-void* batch_normalization_16_mean =  readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_16_variance_path =  dir_prefix + std::string("batch_normalization_16_variance.bin"); 
-void* batch_normalization_16_variance =  readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); 
-std::string batch_normalization_17_gamma_path =  dir_prefix + std::string("batch_normalization_17_gamma.bin"); 
-void* batch_normalization_17_gamma =  readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_17_beta_path =  dir_prefix + std::string("batch_normalization_17_beta.bin"); 
-void* batch_normalization_17_beta =  readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_17_mean_path =  dir_prefix + std::string("batch_normalization_17_mean.bin"); 
-void* batch_normalization_17_mean =  readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_17_variance_path =  dir_prefix + std::string("batch_normalization_17_variance.bin"); 
-void* batch_normalization_17_variance =  readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); 
-std::string depthwise_conv2d_9_w_path =  dir_prefix + std::string("depthwise_conv2d_9_w.bin"); 
-void* depthwise_conv2d_9_w =  readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); 
-std::string batch_normalization_18_gamma_path =  dir_prefix + std::string("batch_normalization_18_gamma.bin"); 
-void* batch_normalization_18_gamma =  readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_18_beta_path =  dir_prefix + std::string("batch_normalization_18_beta.bin"); 
-void* batch_normalization_18_beta =  readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_18_mean_path =  dir_prefix + std::string("batch_normalization_18_mean.bin"); 
-void* batch_normalization_18_mean =  readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_18_variance_path =  dir_prefix + std::string("batch_normalization_18_variance.bin"); 
-void* batch_normalization_18_variance =  readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); 
-std::string batch_normalization_19_gamma_path =  dir_prefix + std::string("batch_normalization_19_gamma.bin"); 
-void* batch_normalization_19_gamma =  readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_19_beta_path =  dir_prefix + std::string("batch_normalization_19_beta.bin"); 
-void* batch_normalization_19_beta =  readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_19_mean_path =  dir_prefix + std::string("batch_normalization_19_mean.bin"); 
-void* batch_normalization_19_mean =  readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_19_variance_path =  dir_prefix + std::string("batch_normalization_19_variance.bin"); 
-void* batch_normalization_19_variance =  readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); 
-std::string depthwise_conv2d_10_w_path =  dir_prefix + std::string("depthwise_conv2d_10_w.bin"); 
-void* depthwise_conv2d_10_w =  readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); 
-std::string batch_normalization_20_gamma_path =  dir_prefix + std::string("batch_normalization_20_gamma.bin"); 
-void* batch_normalization_20_gamma =  readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_20_beta_path =  dir_prefix + std::string("batch_normalization_20_beta.bin"); 
-void* batch_normalization_20_beta =  readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_20_mean_path =  dir_prefix + std::string("batch_normalization_20_mean.bin"); 
-void* batch_normalization_20_mean =  readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_20_variance_path =  dir_prefix + std::string("batch_normalization_20_variance.bin"); 
-void* batch_normalization_20_variance =  readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); 
-std::string batch_normalization_21_gamma_path =  dir_prefix + std::string("batch_normalization_21_gamma.bin"); 
-void* batch_normalization_21_gamma =  readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_21_beta_path =  dir_prefix + std::string("batch_normalization_21_beta.bin"); 
-void* batch_normalization_21_beta =  readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_21_mean_path =  dir_prefix + std::string("batch_normalization_21_mean.bin"); 
-void* batch_normalization_21_mean =  readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_21_variance_path =  dir_prefix + std::string("batch_normalization_21_variance.bin"); 
-void* batch_normalization_21_variance =  readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); 
-std::string depthwise_conv2d_11_w_path =  dir_prefix + std::string("depthwise_conv2d_11_w.bin"); 
-void* depthwise_conv2d_11_w =  readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); 
-std::string batch_normalization_22_gamma_path =  dir_prefix + std::string("batch_normalization_22_gamma.bin"); 
-void* batch_normalization_22_gamma =  readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_22_beta_path =  dir_prefix + std::string("batch_normalization_22_beta.bin"); 
-void* batch_normalization_22_beta =  readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_22_mean_path =  dir_prefix + std::string("batch_normalization_22_mean.bin"); 
-void* batch_normalization_22_mean =  readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_22_variance_path =  dir_prefix + std::string("batch_normalization_22_variance.bin"); 
-void* batch_normalization_22_variance =  readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); 
-std::string batch_normalization_23_gamma_path =  dir_prefix + std::string("batch_normalization_23_gamma.bin"); 
-void* batch_normalization_23_gamma =  readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_23_beta_path =  dir_prefix + std::string("batch_normalization_23_beta.bin"); 
-void* batch_normalization_23_beta =  readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_23_mean_path =  dir_prefix + std::string("batch_normalization_23_mean.bin"); 
-void* batch_normalization_23_mean =  readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_23_variance_path =  dir_prefix + std::string("batch_normalization_23_variance.bin"); 
-void* batch_normalization_23_variance =  readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); 
-std::string depthwise_conv2d_12_w_path =  dir_prefix + std::string("depthwise_conv2d_12_w.bin"); 
-void* depthwise_conv2d_12_w =  readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); 
-std::string batch_normalization_24_gamma_path =  dir_prefix + std::string("batch_normalization_24_gamma.bin"); 
-void* batch_normalization_24_gamma =  readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_24_beta_path =  dir_prefix + std::string("batch_normalization_24_beta.bin"); 
-void* batch_normalization_24_beta =  readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_24_mean_path =  dir_prefix + std::string("batch_normalization_24_mean.bin"); 
-void* batch_normalization_24_mean =  readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); 
-std::string batch_normalization_24_variance_path =  dir_prefix + std::string("batch_normalization_24_variance.bin"); 
-void* batch_normalization_24_variance =  readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); 
-std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); 
-std::string batch_normalization_25_gamma_path =  dir_prefix + std::string("batch_normalization_25_gamma.bin"); 
-void* batch_normalization_25_gamma =  readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); 
-std::string batch_normalization_25_beta_path =  dir_prefix + std::string("batch_normalization_25_beta.bin"); 
-void* batch_normalization_25_beta =  readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); 
-std::string batch_normalization_25_mean_path =  dir_prefix + std::string("batch_normalization_25_mean.bin"); 
-void* batch_normalization_25_mean =  readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); 
-std::string batch_normalization_25_variance_path =  dir_prefix + std::string("batch_normalization_25_variance.bin"); 
-void* batch_normalization_25_variance =  readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); 
-std::string depthwise_conv2d_13_w_path =  dir_prefix + std::string("depthwise_conv2d_13_w.bin"); 
-void* depthwise_conv2d_13_w =  readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); 
-std::string batch_normalization_26_gamma_path =  dir_prefix + std::string("batch_normalization_26_gamma.bin"); 
-void* batch_normalization_26_gamma =  readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); 
-std::string batch_normalization_26_beta_path =  dir_prefix + std::string("batch_normalization_26_beta.bin"); 
-void* batch_normalization_26_beta =  readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); 
-std::string batch_normalization_26_mean_path =  dir_prefix + std::string("batch_normalization_26_mean.bin"); 
-void* batch_normalization_26_mean =  readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); 
-std::string batch_normalization_26_variance_path =  dir_prefix + std::string("batch_normalization_26_variance.bin"); 
-void* batch_normalization_26_variance =  readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); 
-std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); 
-std::string batch_normalization_27_gamma_path =  dir_prefix + std::string("batch_normalization_27_gamma.bin"); 
-void* batch_normalization_27_gamma =  readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); 
-std::string batch_normalization_27_beta_path =  dir_prefix + std::string("batch_normalization_27_beta.bin"); 
-void* batch_normalization_27_beta =  readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); 
-std::string batch_normalization_27_mean_path =  dir_prefix + std::string("batch_normalization_27_mean.bin"); 
-void* batch_normalization_27_mean =  readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); 
-std::string batch_normalization_27_variance_path =  dir_prefix + std::string("batch_normalization_27_variance.bin"); 
-void* batch_normalization_27_variance =  readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); 
-std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-int start = i * batch_size; 
-int end = (i + 1) * batch_size; 
-
-void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -2.196306920051575, 1.347581704139706, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -60.89275047302246, 51.99256916046146, 9); 
-void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-void* var_2 = tensorRelu(var_1); 
-void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-void* var_5 = tensorRelu(var_4); 
-void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 5.713541553974245, conv2d_2_w, -0.9317721160650253, 1.0774258937835774, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.518589503288269, 6.810842518806449, 9); 
-void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-void* var_8 = tensorRelu(var_7); 
-void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-void* var_11 = tensorRelu(var_10); 
-void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.932139402866376, conv2d_3_w, -0.5316544661521911, 0.5753790403604531, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.482631235122681, 3.96730119752885, 9); 
-void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-void* var_14 = tensorRelu(var_13); 
-void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-void* var_17 = tensorRelu(var_16); 
-void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.103263397693674, conv2d_4_w, -0.36234098821878435, 0.4076913900375366, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.04261828327179, 3.88677932929993, 9); 
-void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-void* var_20 = tensorRelu(var_19); 
-void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-void* var_23 = tensorRelu(var_22); 
-void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 5.383221302509475, conv2d_5_w, -0.3131200549006462, 0.29357679939270065, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.921469215393066, 4.338679324150087, 9); 
-void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-void* var_26 = tensorRelu(var_25); 
-void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-void* var_29 = tensorRelu(var_28); 
-void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 4.316738154411368, conv2d_6_w, -0.23299247801303866, 0.2580290257930756, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.207789947509766, 3.932436970710759, 9); 
-void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-void* var_32 = tensorRelu(var_31); 
-void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-void* var_34 = tensorBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-void* var_35 = tensorRelu(var_34); 
-void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 5.830408106803901, conv2d_7_w, -0.20233777219057084, 0.18998308175802117, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.298286915779113, 4.848135117530843, 9); 
-void* var_37 = tensorBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-void* var_38 = tensorRelu(var_37); 
-void* var_39 = tensorConvolution(var_38, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); 
-void* var_40 = tensorBatchNorm(var_39, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); 
-void* var_41 = tensorRelu(var_40); 
-void* var_42 = ConvLayer_PROMISE(var_41, 0.0, 4.446417809963227, conv2d_8_w, -0.17442735651135444, 0.17695830866694454, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.347910885810852, 3.6144364695549145, 9); 
-void* var_43 = tensorBatchNorm(var_42, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); 
-void* var_44 = tensorRelu(var_43); 
-void* var_45 = tensorConvolution(var_44, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); 
-void* var_46 = tensorBatchNorm(var_45, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); 
-void* var_47 = tensorRelu(var_46); 
-void* var_48 = ConvLayer_PROMISE(var_47, 0.0, 4.518095604896667, conv2d_9_w, -0.14546796187758446, 0.15256431668996823, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.0287702755928043, 2.9487365779876953, 9); 
-void* var_49 = tensorBatchNorm(var_48, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); 
-void* var_50 = tensorRelu(var_49); 
-void* var_51 = tensorConvolution(var_50, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); 
-void* var_52 = tensorBatchNorm(var_51, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); 
-void* var_53 = tensorRelu(var_52); 
-void* var_54 = ConvLayer_PROMISE(var_53, 0.0, 6.348575634956407, conv2d_10_w, -0.13025874522328376, 0.13558243343234128, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.2293100805282595, 3.5315046372413645, 9); 
-void* var_55 = tensorBatchNorm(var_54, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); 
-void* var_56 = tensorRelu(var_55); 
-void* var_57 = tensorConvolution(var_56, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); 
-void* var_58 = tensorBatchNorm(var_57, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); 
-void* var_59 = tensorRelu(var_58); 
-void* var_60 = ConvLayer_PROMISE(var_59, 0.0, 5.221003110408843, conv2d_11_w, -0.11900172759592534, 0.12536374783515936, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.038203780174255, 4.004009407043483, 9); 
-void* var_61 = tensorBatchNorm(var_60, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); 
-void* var_62 = tensorRelu(var_61); 
-void* var_63 = tensorConvolution(var_62, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); 
-void* var_64 = tensorBatchNorm(var_63, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); 
-void* var_65 = tensorRelu(var_64); 
-void* var_66 = ConvLayer_PROMISE(var_65, 0.0, 5.732498347759442, conv2d_12_w, -0.10839721685647964, 0.11625668607652187, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.3111015114784244, 4.462933233261136, 9); 
-void* var_67 = tensorBatchNorm(var_66, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); 
-void* var_68 = tensorRelu(var_67); 
-void* var_69 = tensorConvolution(var_68, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); 
-void* var_70 = tensorBatchNorm(var_69, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); 
-void* var_71 = tensorRelu(var_70); 
-void* var_72 = ConvLayer_PROMISE(var_71, 0.0, 7.240498211860681, conv2d_13_w, -0.08623744961619377, 0.08859449951350662, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.175431394577027, 6.2043294754027345, 9); 
-void* var_73 = tensorBatchNorm(var_72, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); 
-void* var_74 = tensorRelu(var_73); 
-void* var_75 = tensorConvolution(var_74, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); 
-void* var_76 = tensorBatchNorm(var_75, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); 
-void* var_77 = tensorRelu(var_76); 
-void* var_78 = ConvLayer_PROMISE(var_77, 0.0, 7.813958834648251, conv2d_14_w, -0.06813025139272214, 0.07002027779817581, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -10.920566423416137, 2.6442912578582534, 9); 
-void* var_79 = tensorBatchNorm(var_78, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); 
-void* var_80 = tensorRelu(var_79); 
-void* var_81 = tensorPooling(var_80,1,2,2,0,0,2,2); 
-void* var_82 = FCLayer_PROMISE(var_81, 0.0, 2.8692066650391013, dense_1_w, -0.22301019695401192, 0.1442659378200768, dense_1_b, -0.1654396, 0.23336112, -1, -12.245949958801269, 23.80532513427739, 9); 
-void* var_83 = tensorSoftmax(var_82); 
-
-uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-float accuracy = computeAccuracy2(labels, batch_size, var_83); 
-final_accuracy += accuracy; 
-freeBatchMemory(); 
- 
-}
-
-final_accuracy = final_accuracy / batch_count; 
-dumpFinalAccuracy(final_accuracy); 
-
-
-}
-
-dumpExecutionAccuracies(); 
-
-llvm_hpvm_cleanupTensorRt(); 
-
-return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_quant.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_quant.cc
deleted file mode 100644
index c3f11e1f2ff7f0a255d40ecd5916fbdada2b0be3..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_quant.cc
+++ /dev/null
@@ -1,210 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(1); 
-
-  int total_runs = 1;
- 
-  for (int i = 0 ; i < total_runs; i++){ 
-
-    startMemTracking(); 
-
-    int test_input_size = 10000; 
-    int batch_size = 2000; 
-    int batch_count = test_input_size / batch_size; 
-    float final_accuracy = 0.0; 
-
-    for(int i = 0; i < batch_count; i++){ 
-
-      std::string dir_prefix = std::string("../../keras/data/mobilenet_shallow/"); 
-      std::string input_path =  dir_prefix + std::string("input.bin"); 
-      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-      std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-      void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-      void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-      void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-      void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-      std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-      void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-      std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-      void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-      void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-      void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-      void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-      void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-      std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-      void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-      void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-      void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-      void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-      std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-      void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-      std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-      void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-      void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-      void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-      void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-      void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,1,1); 
-      std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-      void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-      void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-      void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-      void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,64,1,1); 
-      std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-      void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,64,1,3,3); 
-      std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-      void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-      void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-      void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-      void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-      void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,64,1,1); 
-      std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-      void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-      void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-      void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-      void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-      std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-      void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-      std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-      void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-      void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-      void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-      void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-      std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-      void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-      std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-      void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-      void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-      void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-      void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-      std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-      void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-      std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-      void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-      void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-      void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-      void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-      std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-      void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-      std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-      void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-      void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-      void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-      void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-      std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-      void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-      std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-      void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-      int start = i * batch_size; 
-      int end = (i + 1) * batch_size; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -1.340709443449974, 1.3555025291442875, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -7.2273098745346065, 8.197232282638556, 9); 
-      void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-      void* var_2 = tensorRelu(var_1); 
-      void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-      void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-      void* var_5 = tensorRelu(var_4); 
-      void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 6.053754347324407, conv2d_2_w, -1.1412922372817993, 0.9433415410518639, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.87497807598114, 5.3558874282836655, 9); 
-      void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-      void* var_8 = tensorRelu(var_7); 
-      void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-      void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-      void* var_11 = tensorRelu(var_10); 
-      void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.144686742782682, conv2d_3_w, -0.6453772538900375, 0.6694499132037164, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.349411018371582, 4.33332164001466, 9); 
-      void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-      void* var_14 = tensorRelu(var_13); 
-      void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 2, 2, 1, 64); 
-      void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-      void* var_17 = tensorRelu(var_16); 
-      void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.248231422424324, conv2d_4_w, -0.44596208560466766, 0.49276923143864204, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.3628717079162596, 3.058014160633088, 9); 
-      void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-      void* var_20 = tensorRelu(var_19); 
-      void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-      void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-      void* var_23 = tensorRelu(var_22); 
-      void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 5.569213481903169, conv2d_5_w, -0.3239764194488525, 0.2983359285593033, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.473401127815246, 4.425663429260224, 9); 
-      void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-      void* var_26 = tensorRelu(var_25); 
-      void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-      void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-      void* var_29 = tensorRelu(var_28); 
-      void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 3.728998669862753, conv2d_6_w, -0.2290773878991604, 0.22830345794558554, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.1163714165687564, 2.065946404457088, 9); 
-      void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-      void* var_32 = tensorRelu(var_31); 
-      void* var_33 = tensorPooling(var_32,1,2,2,0,0,2,2); 
-      void* var_34 = FCLayer_PROMISE(var_33, 0.0, 2.191649281263443, dense_1_w, -0.3627079802751541, 0.3849146918058397, dense_1_b, -0.37858343, 0.377391, -1, -11.424064125061035, 18.695249080657973, 9); 
-      void* var_35 = tensorSoftmax(var_34); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_35); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy); 
-
-
-  }
-
-  dumpExecutionAccuracies(); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_valid.cc
deleted file mode 100644
index ba2a14d990a7b7d3d1cc6ad4bc818b2c199a0c6b..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_valid.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  int total_runs = 40; 
-  for (int i = 0 ; i < total_runs; i++){ 
-
-    startMemTracking(); 
-
-    int test_input_size = 4000; 
-    int batch_size = 2000; 
-    int batch_count = test_input_size / batch_size; 
-    float final_accuracy = 0.0; 
-
-    for(int i = 0; i < batch_count; i++){ 
-
-      std::string dir_prefix = std::string("../../keras/data/mobilenet_shallow_nathan/"); 
-      std::string input_path =  dir_prefix + std::string("input.bin"); 
-      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-      std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-      void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-      void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-      void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-      void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-      std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-      void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-      std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-      void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-      void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-      void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-      void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-      void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-      std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-      void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-      void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-      void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-      void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-      std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-      void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-      std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-      void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-      void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-      void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-      void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-      void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-      std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-      void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-      void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-      void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-      void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-      std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-      void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-      std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-      void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-      void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-      void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-      void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-      std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-      void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-      std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-      void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-      void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-      void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-      void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-      std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-      void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-      std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-      void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-      void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-      void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-      void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-      std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-      void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-      std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-      void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-      void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-      void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-      void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-      std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-      void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-      std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-      void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-      void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-      void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-      void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-      std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-      void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-      std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-      void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-      void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-      void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-      void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-      std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-      void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-      std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-      void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-      void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-      void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-      void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-      std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-      void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-      std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-      void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-      void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-      void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-      void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-      std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-      void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); 
-      std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-      void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-      int start = (i * batch_size) + 4000; 
-      int end = ((i + 1) * batch_size) + 4000; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -1.5164621164798737, 1.6472081774473288, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -9.868980642318725, 10.560956018447879, 9); 
-      void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-      void* var_2 = tensorRelu(var_1); 
-      void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-      void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-      void* var_5 = tensorRelu(var_4); 
-      void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 6.821381127357554, conv2d_2_w, -1.1834390873908995, 1.2731596627235617, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -9.875998497009277, 7.51305247974393, 9); 
-      void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-      void* var_8 = tensorRelu(var_7); 
-      void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-      void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-      void* var_11 = tensorRelu(var_10); 
-      void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.826067455768602, conv2d_3_w, -0.599876856982708, 0.6812073457241064, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.633289833068848, 5.177892235755925, 9); 
-      void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-      void* var_14 = tensorRelu(var_13); 
-      void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-      void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-      void* var_17 = tensorRelu(var_16); 
-      void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.02646304416659, conv2d_4_w, -0.4555967862010002, 0.4942613914608956, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.316803941726685, 4.605850250244146, 9); 
-      void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-      void* var_20 = tensorRelu(var_19); 
-      void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-      void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-      void* var_23 = tensorRelu(var_22); 
-      void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 4.532649063110355, conv2d_5_w, -0.35657615590095515, 0.3382165088057521, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.1012511816024775, 4.3630500688553, 9); 
-      void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-      void* var_26 = tensorRelu(var_25); 
-      void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-      void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-      void* var_29 = tensorRelu(var_28); 
-      void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 3.9874704387188977, conv2d_6_w, -0.28502783328294756, 0.28604640334844594, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.243851703643799, 3.486250406742097, 9); 
-      void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-      void* var_32 = tensorRelu(var_31); 
-      void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-      void* var_34 = tensorBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-      void* var_35 = tensorRelu(var_34); 
-      void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 6.563065901756522, conv2d_7_w, -0.18946402323246003, 0.19012390717864017, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.938115713119507, 3.538363476753238, 9); 
-      void* var_37 = tensorBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-      void* var_38 = tensorRelu(var_37); 
-      void* var_39 = tensorPooling(var_38,1,2,2,0,0,2,2); 
-      void* var_40 = FCLayer_PROMISE(var_39, 0.0, 1.8908388000727185, dense_1_w, -0.35140394401550296, 0.422872786462307, dense_1_b, -0.23878151, 0.26507422, -1, -14.630816223144532, 27.27252123260504, 9); 
-      void* var_41 = tensorSoftmax(var_40); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_41); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy); 
-
-
-  }
-
-  dumpExecutionAccuracies(); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_valid.cc
deleted file mode 100644
index 8cf22349346230889f9a4f1385b94ceabe04612e..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_valid.cc
+++ /dev/null
@@ -1,418 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(1); 
-
-  int total_runs = 40; 
-  for (int i = 0 ; i < total_runs; i++){ 
-
-    startMemTracking(); 
-
-    int test_input_size = 4000; 
-    int batch_size = 2000; 
-    int batch_count = test_input_size / batch_size; 
-    float final_accuracy = 0.0; 
-
-    for(int i = 0; i < batch_count; i++){ 
-
-
-      std::string dir_prefix = std::string("../../keras/data/mobilenet_quant/"); 
-      std::string input_path =  dir_prefix + std::string("input.bin"); 
-      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-      std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-      void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-      void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-      void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-      void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-      std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-      void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-      std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-      void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-      void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-      void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-      std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-      void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-      void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-      std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-      void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-      void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-      void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-      void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-      std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-      void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-      std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-      void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-      void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-      void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-      std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-      void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-      void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-      std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-      void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-      void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-      void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-      void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-      std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-      void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-      std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-      void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-      void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-      void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-      void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-      std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-      void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-      std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-      void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-      void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-      void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-      void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-      std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-      void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-      std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-      void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-      void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-      void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-      std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-      void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-      std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-      void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-      std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-      void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-      void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-      void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-      void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-      std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-      void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-      std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-      void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-      void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-      void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-      void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-      std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-      void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-      std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-      void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-      void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-      void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-      void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-      std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-      void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-      std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-      void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-      void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-      void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-      std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-      void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-      std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-      void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-      std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-      void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-      void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-      void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-      void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-      std::string depthwise_conv2d_7_w_path =  dir_prefix + std::string("depthwise_conv2d_7_w.bin"); 
-      void* depthwise_conv2d_7_w =  readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); 
-      std::string batch_normalization_14_gamma_path =  dir_prefix + std::string("batch_normalization_14_gamma.bin"); 
-      void* batch_normalization_14_gamma =  readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_14_beta_path =  dir_prefix + std::string("batch_normalization_14_beta.bin"); 
-      void* batch_normalization_14_beta =  readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_14_mean_path =  dir_prefix + std::string("batch_normalization_14_mean.bin"); 
-      void* batch_normalization_14_mean =  readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_14_variance_path =  dir_prefix + std::string("batch_normalization_14_variance.bin"); 
-      void* batch_normalization_14_variance =  readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); 
-      std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-      void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); 
-      std::string batch_normalization_15_gamma_path =  dir_prefix + std::string("batch_normalization_15_gamma.bin"); 
-      void* batch_normalization_15_gamma =  readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_15_beta_path =  dir_prefix + std::string("batch_normalization_15_beta.bin"); 
-      void* batch_normalization_15_beta =  readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_15_mean_path =  dir_prefix + std::string("batch_normalization_15_mean.bin"); 
-      void* batch_normalization_15_mean =  readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_15_variance_path =  dir_prefix + std::string("batch_normalization_15_variance.bin"); 
-      void* batch_normalization_15_variance =  readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); 
-      std::string depthwise_conv2d_8_w_path =  dir_prefix + std::string("depthwise_conv2d_8_w.bin"); 
-      void* depthwise_conv2d_8_w =  readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); 
-      std::string batch_normalization_16_gamma_path =  dir_prefix + std::string("batch_normalization_16_gamma.bin"); 
-      void* batch_normalization_16_gamma =  readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_16_beta_path =  dir_prefix + std::string("batch_normalization_16_beta.bin"); 
-      void* batch_normalization_16_beta =  readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_16_mean_path =  dir_prefix + std::string("batch_normalization_16_mean.bin"); 
-      void* batch_normalization_16_mean =  readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_16_variance_path =  dir_prefix + std::string("batch_normalization_16_variance.bin"); 
-      void* batch_normalization_16_variance =  readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); 
-      std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-      void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); 
-      std::string batch_normalization_17_gamma_path =  dir_prefix + std::string("batch_normalization_17_gamma.bin"); 
-      void* batch_normalization_17_gamma =  readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_17_beta_path =  dir_prefix + std::string("batch_normalization_17_beta.bin"); 
-      void* batch_normalization_17_beta =  readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_17_mean_path =  dir_prefix + std::string("batch_normalization_17_mean.bin"); 
-      void* batch_normalization_17_mean =  readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_17_variance_path =  dir_prefix + std::string("batch_normalization_17_variance.bin"); 
-      void* batch_normalization_17_variance =  readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); 
-      std::string depthwise_conv2d_9_w_path =  dir_prefix + std::string("depthwise_conv2d_9_w.bin"); 
-      void* depthwise_conv2d_9_w =  readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); 
-      std::string batch_normalization_18_gamma_path =  dir_prefix + std::string("batch_normalization_18_gamma.bin"); 
-      void* batch_normalization_18_gamma =  readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_18_beta_path =  dir_prefix + std::string("batch_normalization_18_beta.bin"); 
-      void* batch_normalization_18_beta =  readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_18_mean_path =  dir_prefix + std::string("batch_normalization_18_mean.bin"); 
-      void* batch_normalization_18_mean =  readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_18_variance_path =  dir_prefix + std::string("batch_normalization_18_variance.bin"); 
-      void* batch_normalization_18_variance =  readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); 
-      std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-      void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); 
-      std::string batch_normalization_19_gamma_path =  dir_prefix + std::string("batch_normalization_19_gamma.bin"); 
-      void* batch_normalization_19_gamma =  readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_19_beta_path =  dir_prefix + std::string("batch_normalization_19_beta.bin"); 
-      void* batch_normalization_19_beta =  readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_19_mean_path =  dir_prefix + std::string("batch_normalization_19_mean.bin"); 
-      void* batch_normalization_19_mean =  readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_19_variance_path =  dir_prefix + std::string("batch_normalization_19_variance.bin"); 
-      void* batch_normalization_19_variance =  readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); 
-      std::string depthwise_conv2d_10_w_path =  dir_prefix + std::string("depthwise_conv2d_10_w.bin"); 
-      void* depthwise_conv2d_10_w =  readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); 
-      std::string batch_normalization_20_gamma_path =  dir_prefix + std::string("batch_normalization_20_gamma.bin"); 
-      void* batch_normalization_20_gamma =  readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_20_beta_path =  dir_prefix + std::string("batch_normalization_20_beta.bin"); 
-      void* batch_normalization_20_beta =  readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_20_mean_path =  dir_prefix + std::string("batch_normalization_20_mean.bin"); 
-      void* batch_normalization_20_mean =  readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_20_variance_path =  dir_prefix + std::string("batch_normalization_20_variance.bin"); 
-      void* batch_normalization_20_variance =  readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); 
-      std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-      void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); 
-      std::string batch_normalization_21_gamma_path =  dir_prefix + std::string("batch_normalization_21_gamma.bin"); 
-      void* batch_normalization_21_gamma =  readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_21_beta_path =  dir_prefix + std::string("batch_normalization_21_beta.bin"); 
-      void* batch_normalization_21_beta =  readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_21_mean_path =  dir_prefix + std::string("batch_normalization_21_mean.bin"); 
-      void* batch_normalization_21_mean =  readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_21_variance_path =  dir_prefix + std::string("batch_normalization_21_variance.bin"); 
-      void* batch_normalization_21_variance =  readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); 
-      std::string depthwise_conv2d_11_w_path =  dir_prefix + std::string("depthwise_conv2d_11_w.bin"); 
-      void* depthwise_conv2d_11_w =  readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); 
-      std::string batch_normalization_22_gamma_path =  dir_prefix + std::string("batch_normalization_22_gamma.bin"); 
-      void* batch_normalization_22_gamma =  readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_22_beta_path =  dir_prefix + std::string("batch_normalization_22_beta.bin"); 
-      void* batch_normalization_22_beta =  readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_22_mean_path =  dir_prefix + std::string("batch_normalization_22_mean.bin"); 
-      void* batch_normalization_22_mean =  readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_22_variance_path =  dir_prefix + std::string("batch_normalization_22_variance.bin"); 
-      void* batch_normalization_22_variance =  readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); 
-      std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-      void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); 
-      std::string batch_normalization_23_gamma_path =  dir_prefix + std::string("batch_normalization_23_gamma.bin"); 
-      void* batch_normalization_23_gamma =  readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_23_beta_path =  dir_prefix + std::string("batch_normalization_23_beta.bin"); 
-      void* batch_normalization_23_beta =  readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_23_mean_path =  dir_prefix + std::string("batch_normalization_23_mean.bin"); 
-      void* batch_normalization_23_mean =  readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_23_variance_path =  dir_prefix + std::string("batch_normalization_23_variance.bin"); 
-      void* batch_normalization_23_variance =  readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); 
-      std::string depthwise_conv2d_12_w_path =  dir_prefix + std::string("depthwise_conv2d_12_w.bin"); 
-      void* depthwise_conv2d_12_w =  readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); 
-      std::string batch_normalization_24_gamma_path =  dir_prefix + std::string("batch_normalization_24_gamma.bin"); 
-      void* batch_normalization_24_gamma =  readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_24_beta_path =  dir_prefix + std::string("batch_normalization_24_beta.bin"); 
-      void* batch_normalization_24_beta =  readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_24_mean_path =  dir_prefix + std::string("batch_normalization_24_mean.bin"); 
-      void* batch_normalization_24_mean =  readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); 
-      std::string batch_normalization_24_variance_path =  dir_prefix + std::string("batch_normalization_24_variance.bin"); 
-      void* batch_normalization_24_variance =  readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); 
-      std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-      void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); 
-      std::string batch_normalization_25_gamma_path =  dir_prefix + std::string("batch_normalization_25_gamma.bin"); 
-      void* batch_normalization_25_gamma =  readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); 
-      std::string batch_normalization_25_beta_path =  dir_prefix + std::string("batch_normalization_25_beta.bin"); 
-      void* batch_normalization_25_beta =  readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); 
-      std::string batch_normalization_25_mean_path =  dir_prefix + std::string("batch_normalization_25_mean.bin"); 
-      void* batch_normalization_25_mean =  readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); 
-      std::string batch_normalization_25_variance_path =  dir_prefix + std::string("batch_normalization_25_variance.bin"); 
-      void* batch_normalization_25_variance =  readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); 
-      std::string depthwise_conv2d_13_w_path =  dir_prefix + std::string("depthwise_conv2d_13_w.bin"); 
-      void* depthwise_conv2d_13_w =  readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); 
-      std::string batch_normalization_26_gamma_path =  dir_prefix + std::string("batch_normalization_26_gamma.bin"); 
-      void* batch_normalization_26_gamma =  readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); 
-      std::string batch_normalization_26_beta_path =  dir_prefix + std::string("batch_normalization_26_beta.bin"); 
-      void* batch_normalization_26_beta =  readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); 
-      std::string batch_normalization_26_mean_path =  dir_prefix + std::string("batch_normalization_26_mean.bin"); 
-      void* batch_normalization_26_mean =  readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); 
-      std::string batch_normalization_26_variance_path =  dir_prefix + std::string("batch_normalization_26_variance.bin"); 
-      void* batch_normalization_26_variance =  readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); 
-      std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-      void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); 
-      std::string batch_normalization_27_gamma_path =  dir_prefix + std::string("batch_normalization_27_gamma.bin"); 
-      void* batch_normalization_27_gamma =  readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); 
-      std::string batch_normalization_27_beta_path =  dir_prefix + std::string("batch_normalization_27_beta.bin"); 
-      void* batch_normalization_27_beta =  readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); 
-      std::string batch_normalization_27_mean_path =  dir_prefix + std::string("batch_normalization_27_mean.bin"); 
-      void* batch_normalization_27_mean =  readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); 
-      std::string batch_normalization_27_variance_path =  dir_prefix + std::string("batch_normalization_27_variance.bin"); 
-      void* batch_normalization_27_variance =  readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); 
-      std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-      void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-      std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-      void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-      int start = (i * batch_size) + 4000; 
-      int end = ((i + 1) * batch_size) + 4000; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -2.196306920051575, 1.347581704139706, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -60.89275047302246, 51.99256916046146, 9); 
-      void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-      void* var_2 = tensorRelu(var_1); 
-      void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-      void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-      void* var_5 = tensorRelu(var_4); 
-      void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 5.713541553974245, conv2d_2_w, -0.9317721160650253, 1.0774258937835774, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.518589503288269, 6.810842518806449, 9); 
-      void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-      void* var_8 = tensorRelu(var_7); 
-      void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-      void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-      void* var_11 = tensorRelu(var_10); 
-      void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.932139402866376, conv2d_3_w, -0.5316544661521911, 0.5753790403604531, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.482631235122681, 3.96730119752885, 9); 
-      void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-      void* var_14 = tensorRelu(var_13); 
-      void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-      void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-      void* var_17 = tensorRelu(var_16); 
-      void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.103263397693674, conv2d_4_w, -0.36234098821878435, 0.4076913900375366, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.04261828327179, 3.88677932929993, 9); 
-      void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-      void* var_20 = tensorRelu(var_19); 
-      void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-      void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-      void* var_23 = tensorRelu(var_22); 
-      void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 5.383221302509475, conv2d_5_w, -0.3131200549006462, 0.29357679939270065, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.921469215393066, 4.338679324150087, 9); 
-      void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-      void* var_26 = tensorRelu(var_25); 
-      void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-      void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-      void* var_29 = tensorRelu(var_28); 
-      void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 4.316738154411368, conv2d_6_w, -0.23299247801303866, 0.2580290257930756, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.207789947509766, 3.932436970710759, 9); 
-      void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-      void* var_32 = tensorRelu(var_31); 
-      void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-      void* var_34 = tensorBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-      void* var_35 = tensorRelu(var_34); 
-      void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 5.830408106803901, conv2d_7_w, -0.20233777219057084, 0.18998308175802117, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.298286915779113, 4.848135117530843, 9); 
-      void* var_37 = tensorBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-      void* var_38 = tensorRelu(var_37); 
-      void* var_39 = tensorConvolution(var_38, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); 
-      void* var_40 = tensorBatchNorm(var_39, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); 
-      void* var_41 = tensorRelu(var_40); 
-      void* var_42 = ConvLayer_PROMISE(var_41, 0.0, 4.446417809963227, conv2d_8_w, -0.17442735651135444, 0.17695830866694454, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.347910885810852, 3.6144364695549145, 9); 
-      void* var_43 = tensorBatchNorm(var_42, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); 
-      void* var_44 = tensorRelu(var_43); 
-      void* var_45 = tensorConvolution(var_44, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); 
-      void* var_46 = tensorBatchNorm(var_45, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); 
-      void* var_47 = tensorRelu(var_46); 
-      void* var_48 = ConvLayer_PROMISE(var_47, 0.0, 4.518095604896667, conv2d_9_w, -0.14546796187758446, 0.15256431668996823, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.0287702755928043, 2.9487365779876953, 9); 
-      void* var_49 = tensorBatchNorm(var_48, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); 
-      void* var_50 = tensorRelu(var_49); 
-      void* var_51 = tensorConvolution(var_50, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); 
-      void* var_52 = tensorBatchNorm(var_51, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); 
-      void* var_53 = tensorRelu(var_52); 
-      void* var_54 = ConvLayer_PROMISE(var_53, 0.0, 6.348575634956407, conv2d_10_w, -0.13025874522328376, 0.13558243343234128, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.2293100805282595, 3.5315046372413645, 9); 
-      void* var_55 = tensorBatchNorm(var_54, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); 
-      void* var_56 = tensorRelu(var_55); 
-      void* var_57 = tensorConvolution(var_56, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); 
-      void* var_58 = tensorBatchNorm(var_57, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); 
-      void* var_59 = tensorRelu(var_58); 
-      void* var_60 = ConvLayer_PROMISE(var_59, 0.0, 5.221003110408843, conv2d_11_w, -0.11900172759592534, 0.12536374783515936, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.038203780174255, 4.004009407043483, 9); 
-      void* var_61 = tensorBatchNorm(var_60, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); 
-      void* var_62 = tensorRelu(var_61); 
-      void* var_63 = tensorConvolution(var_62, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); 
-      void* var_64 = tensorBatchNorm(var_63, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); 
-      void* var_65 = tensorRelu(var_64); 
-      void* var_66 = ConvLayer_PROMISE(var_65, 0.0, 5.732498347759442, conv2d_12_w, -0.10839721685647964, 0.11625668607652187, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.3111015114784244, 4.462933233261136, 9); 
-      void* var_67 = tensorBatchNorm(var_66, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); 
-      void* var_68 = tensorRelu(var_67); 
-      void* var_69 = tensorConvolution(var_68, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); 
-      void* var_70 = tensorBatchNorm(var_69, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); 
-      void* var_71 = tensorRelu(var_70); 
-      void* var_72 = ConvLayer_PROMISE(var_71, 0.0, 7.240498211860681, conv2d_13_w, -0.08623744961619377, 0.08859449951350662, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.175431394577027, 6.2043294754027345, 9); 
-      void* var_73 = tensorBatchNorm(var_72, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); 
-      void* var_74 = tensorRelu(var_73); 
-      void* var_75 = tensorConvolution(var_74, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); 
-      void* var_76 = tensorBatchNorm(var_75, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); 
-      void* var_77 = tensorRelu(var_76); 
-      void* var_78 = ConvLayer_PROMISE(var_77, 0.0, 7.813958834648251, conv2d_14_w, -0.06813025139272214, 0.07002027779817581, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -10.920566423416137, 2.6442912578582534, 9); 
-      void* var_79 = tensorBatchNorm(var_78, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); 
-      void* var_80 = tensorRelu(var_79); 
-      void* var_81 = tensorPooling(var_80,1,2,2,0,0,2,2); 
-      void* var_82 = FCLayer_PROMISE(var_81, 0.0, 2.8692066650391013, dense_1_w, -0.22301019695401192, 0.1442659378200768, dense_1_b, -0.1654396, 0.23336112, -1, -12.245949958801269, 23.80532513427739, 9); 
-      void* var_83 = tensorSoftmax(var_82); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_83); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy); 
-
-
-  }
-
-  dumpExecutionAccuracies(); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_promise.cc
deleted file mode 100644
index f696bbf259b26eb4e45b73aa05658f3208c6fae6..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_promise.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  printf("********* Pipeline: Gaussian - Emboss - Motion Blur - Outline ********** \n");
-
-  int test_batch_size = 1000;
-  int H = 240;
-  int W = 300;
-  float PSNR = 30;
-
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEMO_calib.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-  clearTensorMap();
-  for(int i = 0; i < total_runs; i++){
-    void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-                                          float_type,
-                                          test_batch_size, 1, H, W);
-
-    void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-                                            float_type, 1, 1, 5, 5);
-    void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-                                            float_type, 1, 1, 1, 1);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-        abort();
-      }
-
-      close(fd);
-    }
-
-    readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters
-
-
-    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128,
-                                           2, 2, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* motionblur_out = ConvLayer_PROMISE(emboss_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* result = ConvLayer_PROMISE(motionblur_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0,
-                                           1, 1, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-
-    hpvm_request_tensor(result, 0);
-    //dumpOutput(result);
-
-    computePSNRViolation(golden_output, result, PSNR);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(1);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_valid.cc
deleted file mode 100644
index a8d049f7aca85fbc00e7bcd2b47c083d4f6ea377..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_valid.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  printf("********* Pipeline: Gaussian - Emboss - Motion Blur - Outline ********** \n");
-
-  int test_batch_size = 1000;
-  int H = 240;
-  int W = 300;
-  float PSNR = 30;
-
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEMO_valid.bin",
-					   float_type,
-					   test_batch_size, 1, H, W);
-
-  clearTensorMap();
-  for(int i = 0; i < total_runs; i++){
-    void* input = readTrainedWeights("../model_params/pipeline/dataset/test_4573.bin",
-				     float_type,
-				     test_batch_size, 1, H, W);
-
-    void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-                                            float_type, 1, 1, 5, 5);
-    void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-                                            float_type, 1, 1, 1, 1);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-        abort();
-      }
-
-      close(fd);
-    }
-
-    readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters
-
-
-    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128,
-                                           2, 2, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* motionblur_out = ConvLayer_PROMISE(emboss_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* result = ConvLayer_PROMISE(motionblur_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0,
-                                           1, 1, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-
-    hpvm_request_tensor(result, 0);
-    //dumpOutput(result, "GEMO_valid.bin");
-
-    computePSNRViolation(golden_output, result, PSNR);
-    
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_promise.cc
deleted file mode 100644
index 26ab88f81a70e032a723537864fb5eb4fead6a5b..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_promise.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  printf("********* Pipeline: Gaussian - Emboss - Outline - Motion Blur ********** \n");
-
-  int test_batch_size = 1000;
-  int H = 240;
-  int W = 300;
-  float PSNR = 30;
-
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEOM_calib.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-  clearTensorMap();
-  for(int i = 0; i < total_runs; i++){
-    
-    void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-                                          float_type,
-                                          test_batch_size, 1, H, W);
-
-    void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-                                            float_type, 1, 1, 5, 5);
-    void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-                                            float_type, 1, 1, 1, 1);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-        abort();
-      }
-
-      close(fd);
-    }
-
-    readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters
-
-
-    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128,
-                                           2, 2, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* outline_out = ConvLayer_PROMISE(emboss_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0,
-                                           1, 1, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* result = ConvLayer_PROMISE(outline_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-
-    hpvm_request_tensor(result, 0);
-
-    computePSNRViolation(golden_output, result, PSNR);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(1);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_valid.cc
deleted file mode 100644
index 1fd996f701664358d408f3b7e7a095a66c78f9ef..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_valid.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  printf("********* Pipeline: Gaussian - Emboss - Outline - Motion Blur ********** \n");
-
-  int test_batch_size = 1000;
-  int H = 240;
-  int W = 300;
-  float PSNR = 30;
-
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEOM_valid.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-  clearTensorMap();
-  for(int i = 0; i < total_runs; i++){
-    void* input = readTrainedWeights("../model_params/pipeline/dataset/test_4573.bin",
-                                          float_type,
-                                          test_batch_size, 1, H, W);
-
-    void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-                                            float_type, 1, 1, 5, 5);
-    void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-                                            float_type, 1, 1, 1, 1);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-        abort();
-      }
-
-      close(fd);
-    }
-
-    readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters
-
-
-    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128,
-                                           2, 2, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* outline_out = ConvLayer_PROMISE(emboss_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0,
-                                           1, 1, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* result = ConvLayer_PROMISE(outline_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-
-    hpvm_request_tensor(result, 0);
-    //dumpOutput(result, "GEOM_valid.bin");
-
-    computePSNRViolation(golden_output, result, PSNR);
-
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_promise.cc
deleted file mode 100644
index 577145a01eb3e5e941588fbfddf153c78156dc0c..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_promise.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  printf("********* Pipeline: Gaussian - Emboss - Outline ********** \n");
-
-  int test_batch_size = 1000;
-  int H = 240;
-  int W = 300;
-  float PSNR = 30;
-
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEO_calib.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-  clearTensorMap();
-  for(int i = 0; i < total_runs; i++){
-    void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-                                          float_type,
-                                          test_batch_size, 1, H, W);
-
-    void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-                                            float_type, 1, 1, 5, 5);
-    void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-                                            float_type, 1, 1, 1, 1);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-        abort();
-      }
-
-      close(fd);
-    }
-
-    readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters
-
-
-    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128,
-                                           2, 2, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* result = ConvLayer_PROMISE(emboss_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0,
-                                           1, 1, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-
-    hpvm_request_tensor(result, 0);
-    dumpOutput(result, "GEO_approx.bin");
-
-    computePSNRViolation(golden_output, result, PSNR);
-    freeOutputTensors();
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(1);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_valid.cc
deleted file mode 100644
index 6f269a8ad51734e372ce14dd5c5b94342417e2b2..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_valid.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  printf("********* Pipeline: Gaussian - Emboss - Outline ********** \n");
-
-  int test_batch_size = 1000;
-  int H = 240;
-  int W = 300;
-  float PSNR = 30;
-
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEO_valid.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-  clearTensorMap();
-  for(int i = 0; i < total_runs; i++){
-    void* input = readTrainedWeights("../model_params/pipeline/dataset/test_4573.bin",
-                                          float_type,
-                                          test_batch_size, 1, H, W);
-
-    // NOTE: Filter descriptors do NOT have batch size
-    // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-    // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-    void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-                                            float_type, 1, 1, 5, 5);
-    void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-                                            float_type, 1, 1, 1, 1);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-        abort();
-      }
-
-      close(fd);
-    }
-
-    readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters
-
-
-    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128,
-                                           2, 2, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* result = ConvLayer_PROMISE(emboss_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0,
-                                           1, 1, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-
-
-    hpvm_request_tensor(result, 0);
-    dumpOutput(result, "GEO_40_psnr.bin");
-
-    computePSNRViolation(golden_output, result, PSNR);
-    freeOutputTensors();
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_promise.cc
deleted file mode 100644
index d6150a9ee4b5d99eaa736a911e0fc9da2d593150..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_promise.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  printf("********* Pipeline: Gaussian - Sharpen - Motion Blur - Emboss ********** \n");
-
-  int test_batch_size = 1000;
-  int H = 240;
-  int W = 300;
-  float PSNR = 30;
-
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSME_calib.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-  clearTensorMap();
-  for(int i = 0; i < total_runs; i++){
-    void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-				     float_type,
-				     test_batch_size, 1, H, W);
-
-    void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-                                            float_type, 1, 1, 5, 5);
-    void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-                                            float_type, 1, 1, 1, 1);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-        abort();
-      }
-
-      close(fd);
-    }
-
-    readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters
-
-
-    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* sharpen_out = ConvLayer_PROMISE(gaussian_out, 0, 255, sharpen_filter, -1, 8, NULL, 0, 0,
-                                           1, 1, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* motionblur_out = ConvLayer_PROMISE(sharpen_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* result = ConvLayer_PROMISE(motionblur_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128,
-                                           2, 2, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    hpvm_request_tensor(result, 0);
-    //dumpOutput(result);
-
-    computePSNRViolation(golden_output, result, PSNR);
-    freeOutputTensors();
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(1);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_valid.cc
deleted file mode 100644
index 8c2a1b9f63d41654bf6425a5670cb41fd64b64cf..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_valid.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  printf("********* Pipeline: Gaussian - Sharpen - Motion Blur - Emboss ********** \n");
-
-  int test_batch_size = 1000;
-  int H = 240;
-  int W = 300;
-  float PSNR = 30;
-
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSME_valid.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-  clearTensorMap();
-  for(int i = 0; i < total_runs; i++){
-    void* input = readTrainedWeights("../model_params/pipeline/dataset/test_4573.bin",
-				     float_type,
-				     test_batch_size, 1, H, W);
-
-    void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-                                            float_type, 1, 1, 5, 5);
-    void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-                                            float_type, 1, 1, 1, 1);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-        abort();
-      }
-
-      close(fd);
-    }
-
-    readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters
-
-
-    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* sharpen_out = ConvLayer_PROMISE(gaussian_out, 0, 255, sharpen_filter, -1, 8, NULL, 0, 0,
-                                           1, 1, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* motionblur_out = ConvLayer_PROMISE(sharpen_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* result = ConvLayer_PROMISE(motionblur_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128,
-                                           2, 2, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    hpvm_request_tensor(result, 0);
-    dumpOutput(result, "GSME_valid_20db.bin");
-
-    computePSNRViolation(golden_output, result, PSNR);
-    freeOutputTensors();
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_promise.cc
deleted file mode 100644
index 59077e94a918a8d5540b713c08af5eb6e73cb86f..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_promise.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  printf("********* Pipeline: Gaussian - Sharpen - Motion Blur ********** \n");
-
-  int test_batch_size = 1000;
-  int H = 240;
-  int W = 300;
-  float PSNR = 30;
-
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSM_calib.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-  clearTensorMap();
-  for(int i = 0; i < total_runs; i++){
-    void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin",
-                                          float_type,
-                                          test_batch_size, 1, H, W);
-
-    void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-                                            float_type, 1, 1, 5, 5);
-    void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-                                            float_type, 1, 1, 1, 1);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-        abort();
-      }
-
-      close(fd);
-    }
-
-    readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters
-
-
-    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* sharpen_out = ConvLayer_PROMISE(gaussian_out, 0, 255, sharpen_filter, -1, 8, NULL, 0, 0,
-                                           1, 1, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* result = ConvLayer_PROMISE(sharpen_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-
-    hpvm_request_tensor(result, 0);
-    dumpOutput(result, "GSM_approx.bin");
-
-    computePSNRViolation(golden_output, result, PSNR);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(1);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_valid.cc
deleted file mode 100644
index 45b38e82864b97be220eecbe91ce3d6bfdce6318..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_valid.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  printf("********* Pipeline: Gaussian - Sharpen - Motion Blur ********** \n");
-
-  int test_batch_size = 1000;
-  int H = 240;
-  int W = 300;
-  float PSNR = 30;
-
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSM_valid.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-  clearTensorMap();
-  for(int i = 0; i < total_runs; i++){
-    void* input = readTrainedWeights("../model_params/pipeline/dataset/test_4573.bin",
-                                          float_type,
-                                          test_batch_size, 1, H, W);
-
-    // NOTE: Filter descriptors do NOT have batch size
-    // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-    // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-    void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-                                            float_type, 1, 1, 5, 5);
-    void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-                                            float_type, 1, 1, 1, 1);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-        abort();
-      }
-
-      close(fd);
-    }
-
-    readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters
-
-
-    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* sharpen_out = ConvLayer_PROMISE(gaussian_out, 0, 255, sharpen_filter, -1, 8, NULL, 0, 0,
-                                           1, 1, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* result = ConvLayer_PROMISE(sharpen_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-
-    hpvm_request_tensor(result, 0);
-    dumpOutput(result, "GSM_valid_30db.bin");
-
-    computePSNRViolation(golden_output, result, PSNR);
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_promise.cc
deleted file mode 100644
index 7dd70134731c4fc13d6b7ca239f0566942c02885..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_promise.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testPipeline(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  printf("********* Pipeline: Gaussian - Outline - Motion Blur - Emboss ********** \n");
-
-  int test_batch_size = 2000;
-  int H = 240;
-  int W = 300;
-  float PSNR = 30;
-
-  void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/caltech-G-O-M-E-FP32-clipped-2000.bin",
-                                        float_type,
-                                        test_batch_size, 1, H, W);
-
-  clearTensorMap();
-  for(int i = 0; i < total_runs; i++){
-    void* input = readTrainedWeights("../model_params/pipeline/dataset/caltech101_255_float32.bin",
-                                          float_type,
-                                          test_batch_size, 1, H, W);
-
-    // NOTE: Filter descriptors do NOT have batch size
-    // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-    // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-    void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin",
-                                            float_type, 1, 1, 3, 3);
-    void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin",
-                                            float_type, 1, 1, 9, 9);
-    void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin",
-                                            float_type, 1, 1, 5, 5);
-    void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin",
-                                            float_type, 1, 1, 1, 1);
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-        abort();
-      }
-
-      close(fd);
-    }
-
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-
-    void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-    printf("Gaussian done\n");
-    void* outline_out = ConvLayer_PROMISE(gaussian_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0,
-                                           1, 1, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* motionblur_out = ConvLayer_PROMISE(outline_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0,
-                                           4, 4, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    void* result = ConvLayer_PROMISE(motionblur_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128,
-                                           2, 2, 1, 1,
-                                           0, 0, // pool? no pooling needed
-                                           2,
-                                           0, 255, // out min max? should we assume 0 - 255 for all filters.
-                                                   // Will have to rerun to generate golden output
-                                           9);
-
-    hpvm_request_tensor(result, 0);
-    dumpOutput(result);
-
-    computePSNRViolation(golden_output, result, PSNR);
-    freeOutputTensors();
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-        printf("Invalid descriptor \n");
-        abort();
-      }
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-  }
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testPipeline();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_promise_relu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_promise_relu.cc
deleted file mode 100644
index 7b7f989c16d9203778a602bc03b79a5d41c7a3ba..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_promise_relu.cc
+++ /dev/null
@@ -1,162 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-llvm_hpvm_initTensorRt(0); 
-
-
-
-std::string dir_prefix = std::string("../model_params/resnet18_cifar10_promise/"); 
-std::string input_path =  dir_prefix + std::string("input.bin"); 
-void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); 
-std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-uint8_t* labels = readLabels(labels_path.c_str(),10000); 
-std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); 
-std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); 
-std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); 
-std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); 
-std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); 
-std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); 
-std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); 
-std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); 
-std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); 
-std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); 
-std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); 
-std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); 
-std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); 
-std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); 
-std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); 
-std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); 
-std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); 
-std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); 
-std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); 
-std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); 
-std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); 
-std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); 
-std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); 
-std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); 
-std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); 
-std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); 
-std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); 
-std::string conv2d_14_b_path =  dir_prefix + std::string("conv2d_14_b.bin"); 
-void* conv2d_14_b =  readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); 
-std::string conv2d_15_w_path =  dir_prefix + std::string("conv2d_15_w.bin"); 
-void* conv2d_15_w =  readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); 
-std::string conv2d_15_b_path =  dir_prefix + std::string("conv2d_15_b.bin"); 
-void* conv2d_15_b =  readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_17_w_path =  dir_prefix + std::string("conv2d_17_w.bin"); 
-void* conv2d_17_w =  readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); 
-std::string conv2d_17_b_path =  dir_prefix + std::string("conv2d_17_b.bin"); 
-void* conv2d_17_b =  readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_16_w_path =  dir_prefix + std::string("conv2d_16_w.bin"); 
-void* conv2d_16_w =  readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); 
-std::string conv2d_16_b_path =  dir_prefix + std::string("conv2d_16_b.bin"); 
-void* conv2d_16_b =  readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_18_w_path =  dir_prefix + std::string("conv2d_18_w.bin"); 
-void* conv2d_18_w =  readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); 
-std::string conv2d_18_b_path =  dir_prefix + std::string("conv2d_18_b.bin"); 
-void* conv2d_18_b =  readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_19_w_path =  dir_prefix + std::string("conv2d_19_w.bin"); 
-void* conv2d_19_w =  readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); 
-std::string conv2d_19_b_path =  dir_prefix + std::string("conv2d_19_b.bin"); 
-void* conv2d_19_b =  readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_20_w_path =  dir_prefix + std::string("conv2d_20_w.bin"); 
-void* conv2d_20_w =  readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); 
-std::string conv2d_20_b_path =  dir_prefix + std::string("conv2d_20_b.bin"); 
-void* conv2d_20_b =  readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); 
-std::string conv2d_21_w_path =  dir_prefix + std::string("conv2d_21_w.bin"); 
-void* conv2d_21_w =  readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); 
-std::string conv2d_21_b_path =  dir_prefix + std::string("conv2d_21_b.bin"); 
-void* conv2d_21_b =  readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); 
-std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); 
-std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-void* var_0 = ConvLayer_PROMISE(input, -0.5500815, 0.60786617, conv2d_1_w, -0.71850556, 0.79279953, conv2d_1_b, -0.2551266, 0.14472985, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.2546353, 9); 
-void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 2.2546353, conv2d_2_w, -0.5433847, 0.5556715, conv2d_2_b, -0.19323121, 0.20603828, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.6603086, 9); 
-void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 3.6603086, conv2d_3_w, -0.541787, 0.51889443, conv2d_3_b, -0.2030649, 0.21818772, 1, 1, 1, 1, -1, 0, -1, -5.471612, 5.295037, 9); 
-void* var_3 = tensorAdd(var_0, var_2); 
-void* var_4 = tensorRelu(var_3); 
-void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 6.738059, conv2d_4_w, -0.691922, 0.3410589, conv2d_4_b, -0.5095374, 0.18683507, 1, 1, 1, 1, -1, 0, 1, 0.0, 5.2085133, 9); 
-void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 5.2085133, conv2d_5_w, -0.40904462, 0.39255425, conv2d_5_b, -0.2069035, 0.117769495, 1, 1, 1, 1, -1, 0, -1, -5.6378636, 6.844163, 9); 
-void* var_7 = tensorAdd(var_4, var_6); 
-void* var_8 = tensorRelu(var_7); 
-void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 8.4156885, conv2d_6_w, -0.38497055, 0.3736088, conv2d_6_b, -0.14458452, 0.18792383, 1, 1, 1, 1, -1, 0, 1, 0.0, 6.5020022, 9); 
-void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 6.5020022, conv2d_7_w, -0.30858195, 0.4282964, conv2d_7_b, -0.1807645, 0.07482771, 1, 1, 1, 1, -1, 0, -1, -3.3083274, 5.364109, 9); 
-void* var_11 = tensorAdd(var_8, var_10); 
-void* var_12 = tensorRelu(var_11); 
-void* var_13 = ConvLayer_PROMISE(var_12, 0.0, 9.382513, conv2d_8_w, -0.5838584, 0.44527876, conv2d_8_b, -0.2637087, 0.22768898, 1, 1, 2, 2, -1, 0, 1, 0.0, 12.158108, 9); 
-void* var_14 = ConvLayer_PROMISE(var_13, 0.0, 12.158108, conv2d_9_w, -0.46162197, 0.42936426, conv2d_9_b, -0.1289545, 0.51804763, 1, 1, 1, 1, -1, 0, -1, -17.15394, 12.169734, 9); 
-void* var_15 = ConvLayer_PROMISE(var_12, 0.0, 9.382513, conv2d_10_w, -0.69971406, 0.99415976, conv2d_10_b, -0.1289545, 0.51804763, 0, 0, 2, 2, -1, 0, -1, -5.418469, 11.448848, 9); 
-void* var_16 = tensorAdd(var_15, var_14); 
-void* var_17 = tensorRelu(var_16); 
-void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 18.46502, conv2d_11_w, -0.43470153, 0.36867705, conv2d_11_b, -0.51738244, 0.15350178, 1, 1, 1, 1, -1, 0, 1, 0.0, 13.211603, 9); 
-void* var_19 = ConvLayer_PROMISE(var_18, 0.0, 13.211603, conv2d_12_w, -0.29948497, 0.38820583, conv2d_12_b, -0.37389848, 0.26664862, 1, 1, 1, 1, -1, 0, -1, -10.085186, 13.555471, 9); 
-void* var_20 = tensorAdd(var_17, var_19); 
-void* var_21 = tensorRelu(var_20); 
-void* var_22 = ConvLayer_PROMISE(var_21, 0.0, 22.695429, conv2d_13_w, -0.44317818, 0.30531815, conv2d_13_b, -0.36851564, 0.06573071, 1, 1, 1, 1, -1, 0, 1, 0.0, 19.886229, 9); 
-void* var_23 = ConvLayer_PROMISE(var_22, 0.0, 19.886229, conv2d_14_w, -0.3271309, 0.33153397, conv2d_14_b, -0.38927156, 0.066472165, 1, 1, 1, 1, -1, 0, -1, -8.295334, 15.001421, 9); 
-void* var_24 = tensorAdd(var_21, var_23); 
-void* var_25 = tensorRelu(var_24); 
-void* var_26 = ConvLayer_PROMISE(var_25, 0.0, 28.637527, conv2d_15_w, -0.44983515, 0.43999374, conv2d_15_b, -0.21998975, 0.36213604, 1, 1, 2, 2, -1, 0, 1, 0.0, 44.106163, 9); 
-void* var_27 = ConvLayer_PROMISE(var_26, 0.0, 44.106163, conv2d_16_w, -0.4508994, 0.41697323, conv2d_16_b, -0.27649263, 0.42242092, 1, 1, 1, 1, -1, 0, -1, -47.52727, 75.15572, 9); 
-void* var_28 = ConvLayer_PROMISE(var_25, 0.0, 28.637527, conv2d_17_w, -0.57827795, 0.7829617, conv2d_17_b, -0.27649248, 0.42242065, 0, 0, 2, 2, -1, 0, -1, -8.998529, 10.628808, 9); 
-void* var_29 = tensorAdd(var_28, var_27); 
-void* var_30 = tensorRelu(var_29); 
-void* var_31 = ConvLayer_PROMISE(var_30, 0.0, 77.00688, conv2d_18_w, -0.37020415, 0.4076619, conv2d_18_b, -0.572569, 0.288411, 1, 1, 1, 1, -1, 0, 1, 0.0, 58.209835, 9); 
-void* var_32 = ConvLayer_PROMISE(var_31, 0.0, 58.209835, conv2d_19_w, -0.40217596, 0.4803875, conv2d_19_b, -0.84837836, 0.41470897, 1, 1, 1, 1, -1, 0, -1, -61.702118, 45.982677, 9); 
-void* var_33 = tensorAdd(var_30, var_32); 
-void* var_34 = tensorRelu(var_33); 
-void* var_35 = ConvLayer_PROMISE(var_34, 0.0, 98.688995, conv2d_20_w, -0.41761914, 0.4584275, conv2d_20_b, -0.335136, 0.42988807, 1, 1, 1, 1, -1, 0, 1, 0.0, 95.06278, 9); 
-void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 95.06278, conv2d_21_w, -0.32336038, 0.35931262, conv2d_21_b, -0.645176, 0.45402992, 1, 1, 1, 1, -1, 0, -1, -116.62798, 127.2517, 9); 
-void* var_37 = tensorAdd(var_34, var_36); 
-void* var_38 = tensorRelu(var_37); 
-void* var_39 = tensorPooling(var_38,1,8,8,0,0,8,8); 
-void* var_40 = FCLayer_PROMISE(var_39, 0.0, 22.762705, dense_1_w, -0.876813, 0.6065728, dense_1_b, -0.36824417, 0.25160706, -1, -30.914663, 49.802082, 9); 
-void* var_41 = tensorSoftmax(var_40); 
-
-computeAccuracy2(labels,10000,var_41); 
-
-llvm_hpvm_cleanupTensorRt(); 
-
-return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_valid.cc
deleted file mode 100644
index 63aef3744fabc598ccc6653534074283edecef03..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_valid.cc
+++ /dev/null
@@ -1,189 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  int total_runs = 20; 
-  for (int i = 0 ; i < total_runs; i++){ 
-
-    startMemTracking(); 
-
-    int test_input_size = 5000; 
-    int batch_size = 2500;
-    int offset = 0;
-    int batch_count = test_input_size / batch_size; 
-    float final_accuracy = 0.0; 
-
-    for(int i = 0; i < batch_count; i++){ 
-
-      std::string dir_prefix = std::string("../model_params/resnet18_cifar10_promise/");	   
-      std::string input_path =  dir_prefix + std::string("input.bin"); 
-      std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-      std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-      void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); 
-      std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-      void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); 
-      std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-      void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); 
-      std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-      void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); 
-      std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-      void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); 
-      std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-      void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); 
-      std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-      void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); 
-      std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-      void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); 
-      std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-      void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); 
-      std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-      void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); 
-      std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-      void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); 
-      std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-      void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); 
-      std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-      void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); 
-      std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-      void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); 
-      std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-      void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); 
-      std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-      void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-      void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); 
-      std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-      void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-      void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); 
-      std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-      void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-      void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); 
-      std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-      void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-      void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); 
-      std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-      void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-      void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); 
-      std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-      void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-      void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); 
-      std::string conv2d_14_b_path =  dir_prefix + std::string("conv2d_14_b.bin"); 
-      void* conv2d_14_b =  readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); 
-      std::string conv2d_15_w_path =  dir_prefix + std::string("conv2d_15_w.bin"); 
-      void* conv2d_15_w =  readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); 
-      std::string conv2d_15_b_path =  dir_prefix + std::string("conv2d_15_b.bin"); 
-      void* conv2d_15_b =  readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_17_w_path =  dir_prefix + std::string("conv2d_17_w.bin"); 
-      void* conv2d_17_w =  readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); 
-      std::string conv2d_17_b_path =  dir_prefix + std::string("conv2d_17_b.bin"); 
-      void* conv2d_17_b =  readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_16_w_path =  dir_prefix + std::string("conv2d_16_w.bin"); 
-      void* conv2d_16_w =  readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); 
-      std::string conv2d_16_b_path =  dir_prefix + std::string("conv2d_16_b.bin"); 
-      void* conv2d_16_b =  readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_18_w_path =  dir_prefix + std::string("conv2d_18_w.bin"); 
-      void* conv2d_18_w =  readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); 
-      std::string conv2d_18_b_path =  dir_prefix + std::string("conv2d_18_b.bin"); 
-      void* conv2d_18_b =  readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_19_w_path =  dir_prefix + std::string("conv2d_19_w.bin"); 
-      void* conv2d_19_w =  readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); 
-      std::string conv2d_19_b_path =  dir_prefix + std::string("conv2d_19_b.bin"); 
-      void* conv2d_19_b =  readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_20_w_path =  dir_prefix + std::string("conv2d_20_w.bin"); 
-      void* conv2d_20_w =  readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); 
-      std::string conv2d_20_b_path =  dir_prefix + std::string("conv2d_20_b.bin"); 
-      void* conv2d_20_b =  readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); 
-      std::string conv2d_21_w_path =  dir_prefix + std::string("conv2d_21_w.bin"); 
-      void* conv2d_21_w =  readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); 
-      std::string conv2d_21_b_path =  dir_prefix + std::string("conv2d_21_b.bin"); 
-      void* conv2d_21_b =  readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); 
-      std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-      void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); 
-      std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-      void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-      int start = i * batch_size + offset; 
-      int end = (i + 1) * batch_size + offset; 
-
-      void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-      void* var_0 = ConvLayer_PROMISE(input, -0.5500815, 0.60786617, conv2d_1_w, -1.0248864, 1.2929907, conv2d_1_b, -0.36291853, 0.2533059, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.8791630274057383, 9); 
-      void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 0.8791630274057383, conv2d_2_w, -0.69884616, 0.71849966, conv2d_2_b, -0.2781147, 0.45571187, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1859495645761484, 9); 
-      void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 1.1859495645761484, conv2d_3_w, -0.59568167, 0.7714691, conv2d_3_b, -0.8602873, 0.19743633, 1, 1, 1, 1, -1, 0, -1, -2.2316832554340365, 2.266301159858699, 9); 
-      void* var_3 = tensorAdd(var_0, var_2); 
-      void* var_4 = tensorRelu(var_3); 
-      void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 2.789569139480591, conv2d_4_w, -0.41976976, 0.43748936, conv2d_4_b, -0.7021962, 0.3033103, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.3341254055499974, 9); 
-      void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 1.3341254055499974, conv2d_5_w, -0.46757826, 0.4635873, conv2d_5_b, -0.20662616, 0.1778044, 1, 1, 1, 1, -1, 0, -1, -0.9912706619501114, 1.0245310074090952, 9); 
-      void* var_7 = tensorAdd(var_4, var_6); 
-      void* var_8 = tensorRelu(var_7); 
-      void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 2.998989346027372, conv2d_6_w, -0.64404047, 0.45383143, conv2d_6_b, -0.819547, 0.38550296, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.2850778144597967, 9); 
-      void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 1.2850778144597967, conv2d_7_w, -0.41986948, 0.33654243, conv2d_7_b, -0.3563013, 0.22371122, 1, 1, 1, 1, -1, 0, -1, -1.2940701305866242, 0.7332147359848022, 9); 
-      void* var_11 = tensorAdd(var_8, var_10); 
-      void* var_12 = tensorRelu(var_11); 
-      void* var_13 = ConvLayer_PROMISE(var_12, 0.0, 2.8626382386684384, conv2d_8_w, -0.4805263, 0.50655717, conv2d_8_b, -0.296758, 0.7742441, 1, 1, 2, 2, -1, 0, 1, 0.0, 3.6232483506202584, 9); 
-      void* var_14 = ConvLayer_PROMISE(var_13, 0.0, 3.6232483506202584, conv2d_9_w, -0.52083415, 0.45517674, conv2d_9_b, -0.20242067, 0.8236838, 1, 1, 1, 1, -1, 0, -1, -6.319877154827118, 6.882811555862418, 9); 
-      void* var_15 = ConvLayer_PROMISE(var_12, 0.0, 2.8626382386684384, conv2d_10_w, -0.5338656, 1.3395424, conv2d_10_b, -0.20242067, 0.8236838, 0, 0, 2, 2, -1, 0, -1, -0.9930689406394959, 2.8721754658222096, 9); 
-      void* var_16 = tensorAdd(var_15, var_14); 
-      void* var_17 = tensorRelu(var_16); 
-      void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 8.315858840942383, conv2d_11_w, -0.34429058, 0.43629733, conv2d_11_b, -1.0744808, 0.056708273, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.6893706333637226, 9); 
-      void* var_19 = ConvLayer_PROMISE(var_18, 0.0, 2.6893706333637226, conv2d_12_w, -0.30342352, 0.39493486, conv2d_12_b, -0.44630566, 0.6492069, 1, 1, 1, 1, -1, 0, -1, -1.8801953810453416, 1.714934362173068, 9); 
-      void* var_20 = tensorAdd(var_17, var_19); 
-      void* var_21 = tensorRelu(var_20); 
-      void* var_22 = ConvLayer_PROMISE(var_21, 0.0, 8.381670951843262, conv2d_13_w, -0.38351893, 0.45775774, conv2d_13_b, -1.4733055, -0.014426912, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.569231034517287, 9); 
-      void* var_23 = ConvLayer_PROMISE(var_22, 0.0, 2.569231034517287, conv2d_14_w, -0.25695276, 0.45372736, conv2d_14_b, -0.5259744, 0.26591402, 1, 1, 1, 1, -1, 0, -1, -1.9701244848966597, 1.4661400413513093, 9); 
-      void* var_24 = tensorAdd(var_21, var_23); 
-      void* var_25 = tensorRelu(var_24); 
-      void* var_26 = ConvLayer_PROMISE(var_25, 0.0, 8.188224797248836, conv2d_15_w, -0.55299705, 0.5443531, conv2d_15_b, -0.71790683, 1.2730768, 1, 1, 2, 2, -1, 0, 1, 0.0, 12.411911067962677, 9); 
-      void* var_27 = ConvLayer_PROMISE(var_26, 0.0, 12.411911067962677, conv2d_16_w, -0.4203967, 0.48641303, conv2d_16_b, -0.90653443, 1.3546854, 1, 1, 1, 1, -1, 0, -1, -25.407194147109987, 20.519153985977383, 9); 
-      void* var_28 = ConvLayer_PROMISE(var_25, 0.0, 8.188224797248836, conv2d_17_w, -0.4365755, 0.84913826, conv2d_17_b, -0.90653443, 1.3546851, 0, 0, 2, 2, -1, 0, -1, -4.256520752906799, 5.730506427288059, 9); 
-      void* var_29 = tensorAdd(var_28, var_27); 
-      void* var_30 = tensorRelu(var_29); 
-      void* var_31 = ConvLayer_PROMISE(var_30, 0.0, 22.350475664138983, conv2d_18_w, -0.38657624, 0.5228989, conv2d_18_b, -1.2083547, 0.76361173, 1, 1, 1, 1, -1, 0, 1, 0.0, 23.93387042045599, 9); 
-      void* var_32 = ConvLayer_PROMISE(var_31, 0.0, 23.93387042045599, conv2d_19_w, -0.40857902, 0.575035, conv2d_19_b, -1.8731614, 1.0960501, 1, 1, 1, 1, -1, 0, -1, -35.37134181976318, 19.209569931030273, 9); 
-      void* var_33 = tensorAdd(var_30, var_32); 
-      void* var_34 = tensorRelu(var_33); 
-      void* var_35 = ConvLayer_PROMISE(var_34, 0.0, 29.434949998855657, conv2d_20_w, -0.33079496, 0.5893278, conv2d_20_b, -1.0234511, 1.0016295, 1, 1, 1, 1, -1, 0, 1, 0.0, 27.216757345199866, 9); 
-      void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 27.216757345199866, conv2d_21_w, -0.27897888, 0.38280907, conv2d_21_b, -2.2086356, 1.0066502, 1, 1, 1, 1, -1, 0, -1, -42.31447326660156, 29.365212144852038, 9); 
-      void* var_37 = tensorAdd(var_34, var_36); 
-      void* var_38 = tensorRelu(var_37); 
-      void* var_39 = tensorPooling(var_38,1,8,8,0,0,8,8); 
-      void* var_40 = FCLayer_PROMISE(var_39, 0.0, 13.736315393447876, dense_1_w, -1.5092047, 1.0279838, dense_1_b, -0.49379802, 0.61032647, -1, -45.52749088287353, 31.64324799537669, 9); 
-      void* var_41 = tensorSoftmax(var_40); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, var_41); 
-      final_accuracy += accuracy; 
-      freeBatchMemory(); 
- 
-    }
-
-    final_accuracy = final_accuracy / batch_count; 
-    dumpFinalAccuracy(final_accuracy); 
-
-
-  }
-
-  dumpExecutionAccuracies(); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_promise_quant.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_promise_quant.cc
deleted file mode 100644
index bbc247fc46fa553a2d8fb479c77023c9960375d6..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_promise_quant.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
-llvm_hpvm_initTensorRt(0); 
-
- int total_runs = 1; 
- for (int i = 0 ; i < total_runs; i++){ 
-
-   startMemTracking(); 
-
-   int test_input_size = 10000; 
-   int batch_size = 2500; 
-   int batch_count = test_input_size / batch_size; 
-   float final_accuracy = 0.0; 
-
-   for(int i = 0; i < batch_count; i++){ 
-
-     std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); 
-     std::string input_path =  dir_prefix + std::string("input.bin"); 
-     std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-     std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-     void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-     std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-     void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-     std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-     void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-     std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-     void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-     std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-     void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-     std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-     void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-     std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-     void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-     std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-     void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-     std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-     void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-     std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-     void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-     void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-     std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-     void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-     void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-     std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-     void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-     void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-     std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-     void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-     void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-     void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-     void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-     void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-     void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-     void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-     void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-     void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-     void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-     void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-     std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-     void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-     std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-     void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-     std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-     void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); 
-     std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-     void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); 
-
-
-     int start = i * batch_size; 
-     int end = (i + 1) * batch_size; 
-
-     void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-     void* var_0 = ConvLayer_PROMISE(input, -1.7829767, 1.9456929, conv2d_1_w, -0.7450515, 0.71249133, conv2d_1_b, -1.5885142, 0.275554, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.7384350299835205, 9); 
-     void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.7384350299835205, conv2d_2_w, -0.30790088, 0.43504623, conv2d_2_b, -1.4242363, 1.2602744, 1, 1, 1, 1, 0, 2, 1, 0.0, 4.417154796123498, 9); 
-     void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 4.417154796123498, conv2d_3_w, -0.29189092, 0.26958522, conv2d_3_b, -1.0527138, 0.9075671, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.1919608163833573, 9); 
-     void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 3.1919608163833573, conv2d_4_w, -0.15521508, 0.1829038, conv2d_4_b, -0.845419, 1.9358484, 1, 1, 1, 1, 0, 2, 1, 0.0, 5.108994026184064, 9); 
-     void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 5.108994026184064, conv2d_5_w, -0.13149762, 0.14811686, conv2d_5_b, -0.7162557, 1.0370971, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.8264513099193493, 9); 
-     void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 2.8264513099193493, conv2d_6_w, -0.06236292, 0.08321518, conv2d_6_b, -0.9067523, 0.9922458, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.507186658382409, 9); 
-     void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 2.507186658382409, conv2d_7_w, -0.06471479, 0.1024472, conv2d_7_b, -0.15943134, 0.7988499, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.550416946411133, 9); 
-     void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 2.550416946411133, conv2d_8_w, -0.06320205, 0.08291938, conv2d_8_b, -0.32540628, 0.5203079, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.7303829237818675, 9); 
-     void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 0.7303829237818675, conv2d_9_w, -0.037707984, 0.051601283, conv2d_9_b, -0.25622904, 0.11251946, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.32286912292241965, 9); 
-     void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 0.32286912292241965, conv2d_10_w, -0.056007143, 0.09549151, conv2d_10_b, -0.11591503, 0.06267536, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.47936276525258825, 9); 
-     void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 0.47936276525258825, conv2d_11_w, -0.060094673, 0.10868926, conv2d_11_b, -0.105962686, 0.09584572, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.6409912902116734, 9); 
-     void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 0.6409912902116734, conv2d_12_w, -0.034618977, 0.05792674, conv2d_12_b, -0.4237576, 0.11035452, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1027569955587349, 9); 
-     void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 1.1027569955587349, conv2d_13_w, -0.035480656, 0.058295887, conv2d_13_b, -0.21477045, 0.14263579, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.4708798038959503, 9); 
-     void* var_13 = FCLayer_PROMISE(var_12, 0.0, 2.4708798038959503, dense_1_w, -0.08929961, 0.11301676, dense_1_b, -0.20798548, 0.47405547, 1, 0.0, 2.8148007798194876, 9); 
-     void* var_14 = FCLayer_PROMISE(var_13, 0.0, 2.8148007798194876, dense_2_w, -0.6627122, 0.35539475, dense_2_b, -1.0631907, 0.9830786, -1, -21.189617557525633, 22.645009384155276, 9); 
-     void* var_15 = tensorSoftmax(var_14); 
-
-     uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-     float accuracy = computeAccuracy2(labels, batch_size, var_15, 100); 
-     final_accuracy += accuracy; 
-     freeBatchMemory(); 
- 
-   }
-
-   final_accuracy = final_accuracy / batch_count; 
-   dumpFinalAccuracy(final_accuracy); 
- }
-
- dumpExecutionAccuracies(); 
-
- llvm_hpvm_cleanupTensorRt(); 
-
- return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_top5_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_top5_valid.cc
deleted file mode 100644
index 21eb3ba3e09a7a8ef5ae8940d4c60501ac01abe1..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_top5_valid.cc
+++ /dev/null
@@ -1,136 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
- llvm_hpvm_initTensorRt(2); 
-
- int total_runs = 20; 
- for (int i = 0 ; i < total_runs; i++){ 
-
-   startMemTracking(); 
-
-   int test_input_size = 5000; 
-   int batch_size = 5000;
-   int offset = 0;
-   int batch_count = test_input_size / batch_size; 
-   float final_accuracy = 0.0; 
-
-   for(int i = 0; i < batch_count; i++){ 
-
-     std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); 
-     std::string input_path =  dir_prefix + std::string("input.bin"); 
-     std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-     std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-     void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-     std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-     void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-     std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-     void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-     std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-     void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-     std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-     void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-     std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-     void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-     std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-     void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-     std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-     void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-     std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-     void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-     std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-     void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-     void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-     std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-     void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-     void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-     std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-     void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-     void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-     std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-     void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-     void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-     void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-     void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-     void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-     void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-     void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-     void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-     void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-     void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-     void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-     std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-     void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-     std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-     void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-     std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-     void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); 
-     std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-     void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); 
-
-
-     int start = i * batch_size + offset; 
-     int end = (i + 1) * batch_size + offset; 
-
-     void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-     void* var_0 = ConvLayer_PROMISE(input, -1.7829767, 1.9456929, conv2d_1_w, -0.7450515, 0.71249133, conv2d_1_b, -1.5885142, 0.275554, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.7384350299835205, 9); 
-     void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.7384350299835205, conv2d_2_w, -0.30790088, 0.43504623, conv2d_2_b, -1.4242363, 1.2602744, 1, 1, 1, 1, 0, 2, 1, 0.0, 4.417154796123498, 9); 
-     void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 4.417154796123498, conv2d_3_w, -0.29189092, 0.26958522, conv2d_3_b, -1.0527138, 0.9075671, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.1919608163833573, 9); 
-     void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 3.1919608163833573, conv2d_4_w, -0.15521508, 0.1829038, conv2d_4_b, -0.845419, 1.9358484, 1, 1, 1, 1, 0, 2, 1, 0.0, 5.108994026184064, 9); 
-     void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 5.108994026184064, conv2d_5_w, -0.13149762, 0.14811686, conv2d_5_b, -0.7162557, 1.0370971, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.8264513099193493, 9); 
-     void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 2.8264513099193493, conv2d_6_w, -0.06236292, 0.08321518, conv2d_6_b, -0.9067523, 0.9922458, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.507186658382409, 9); 
-     void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 2.507186658382409, conv2d_7_w, -0.06471479, 0.1024472, conv2d_7_b, -0.15943134, 0.7988499, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.550416946411133, 9); 
-     void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 2.550416946411133, conv2d_8_w, -0.06320205, 0.08291938, conv2d_8_b, -0.32540628, 0.5203079, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.7303829237818675, 9); 
-     void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 0.7303829237818675, conv2d_9_w, -0.037707984, 0.051601283, conv2d_9_b, -0.25622904, 0.11251946, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.32286912292241965, 9); 
-     void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 0.32286912292241965, conv2d_10_w, -0.056007143, 0.09549151, conv2d_10_b, -0.11591503, 0.06267536, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.47936276525258825, 9); 
-     void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 0.47936276525258825, conv2d_11_w, -0.060094673, 0.10868926, conv2d_11_b, -0.105962686, 0.09584572, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.6409912902116734, 9); 
-     void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 0.6409912902116734, conv2d_12_w, -0.034618977, 0.05792674, conv2d_12_b, -0.4237576, 0.11035452, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1027569955587349, 9); 
-     void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 1.1027569955587349, conv2d_13_w, -0.035480656, 0.058295887, conv2d_13_b, -0.21477045, 0.14263579, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.4708798038959503, 9); 
-     void* var_13 = FCLayer_PROMISE(var_12, 0.0, 2.4708798038959503, dense_1_w, -0.08929961, 0.11301676, dense_1_b, -0.20798548, 0.47405547, 1, 0.0, 2.8148007798194876, 9); 
-     void* var_14 = FCLayer_PROMISE(var_13, 0.0, 2.8148007798194876, dense_2_w, -0.6627122, 0.35539475, dense_2_b, -1.0631907, 0.9830786, -1, -21.189617557525633, 22.645009384155276, 9); 
-     void* var_15 = tensorSoftmax(var_14); 
-
-     uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-     //-- float accuracy = computeAccuracy2(labels, batch_size, var_15, 100);
-     float accuracy = computeTop5Accuracy(labels, batch_size, var_15, 100);
-
-     final_accuracy += accuracy; 
-     freeBatchMemory(); 
- 
-   }
-
-   final_accuracy = final_accuracy / batch_count; 
-   dumpFinalAccuracy(final_accuracy); 
- }
-
- dumpExecutionAccuracies(); 
-
- llvm_hpvm_cleanupTensorRt(); 
-
- return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_valid.cc
deleted file mode 100644
index b78c506e618535be50e92d9e77a59ecdd793d720..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_valid.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
- llvm_hpvm_initTensorRt(0); 
-
- int total_runs = 20; 
- for (int i = 0 ; i < total_runs; i++){ 
-
-   startMemTracking(); 
-
-   int test_input_size = 5000; 
-   int batch_size = 2500;
-   int offset = 0;
-   int batch_count = test_input_size / batch_size; 
-   float final_accuracy = 0.0; 
-
-   for(int i = 0; i < batch_count; i++){ 
-
-     std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); 
-     std::string input_path =  dir_prefix + std::string("input.bin"); 
-     std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-     std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-     void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-     std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-     void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-     std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-     void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-     std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-     void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-     std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-     void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-     std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-     void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-     std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-     void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-     std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-     void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-     std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-     void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-     std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-     void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-     void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-     std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-     void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-     void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-     std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-     void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-     void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-     std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-     void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-     void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-     void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-     void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-     void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-     void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-     void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-     void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-     void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-     void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-     void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-     std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-     void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-     std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-     void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-     std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-     void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); 
-     std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-     void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); 
-
-
-     int start = i * batch_size + offset; 
-     int end = (i + 1) * batch_size + offset; 
-
-     void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-     void* var_0 = ConvLayer_PROMISE(input, -1.7829767, 1.9456929, conv2d_1_w, -0.7450515, 0.71249133, conv2d_1_b, -1.5885142, 0.275554, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.7384350299835205, 9); 
-     void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.7384350299835205, conv2d_2_w, -0.30790088, 0.43504623, conv2d_2_b, -1.4242363, 1.2602744, 1, 1, 1, 1, 0, 2, 1, 0.0, 4.417154796123498, 9); 
-     void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 4.417154796123498, conv2d_3_w, -0.29189092, 0.26958522, conv2d_3_b, -1.0527138, 0.9075671, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.1919608163833573, 9); 
-     void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 3.1919608163833573, conv2d_4_w, -0.15521508, 0.1829038, conv2d_4_b, -0.845419, 1.9358484, 1, 1, 1, 1, 0, 2, 1, 0.0, 5.108994026184064, 9); 
-     void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 5.108994026184064, conv2d_5_w, -0.13149762, 0.14811686, conv2d_5_b, -0.7162557, 1.0370971, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.8264513099193493, 9); 
-     void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 2.8264513099193493, conv2d_6_w, -0.06236292, 0.08321518, conv2d_6_b, -0.9067523, 0.9922458, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.507186658382409, 9); 
-     void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 2.507186658382409, conv2d_7_w, -0.06471479, 0.1024472, conv2d_7_b, -0.15943134, 0.7988499, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.550416946411133, 9); 
-     void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 2.550416946411133, conv2d_8_w, -0.06320205, 0.08291938, conv2d_8_b, -0.32540628, 0.5203079, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.7303829237818675, 9); 
-     void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 0.7303829237818675, conv2d_9_w, -0.037707984, 0.051601283, conv2d_9_b, -0.25622904, 0.11251946, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.32286912292241965, 9); 
-     void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 0.32286912292241965, conv2d_10_w, -0.056007143, 0.09549151, conv2d_10_b, -0.11591503, 0.06267536, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.47936276525258825, 9); 
-     void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 0.47936276525258825, conv2d_11_w, -0.060094673, 0.10868926, conv2d_11_b, -0.105962686, 0.09584572, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.6409912902116734, 9); 
-     void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 0.6409912902116734, conv2d_12_w, -0.034618977, 0.05792674, conv2d_12_b, -0.4237576, 0.11035452, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1027569955587349, 9); 
-     void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 1.1027569955587349, conv2d_13_w, -0.035480656, 0.058295887, conv2d_13_b, -0.21477045, 0.14263579, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.4708798038959503, 9); 
-     void* var_13 = FCLayer_PROMISE(var_12, 0.0, 2.4708798038959503, dense_1_w, -0.08929961, 0.11301676, dense_1_b, -0.20798548, 0.47405547, 1, 0.0, 2.8148007798194876, 9); 
-     void* var_14 = FCLayer_PROMISE(var_13, 0.0, 2.8148007798194876, dense_2_w, -0.6627122, 0.35539475, dense_2_b, -1.0631907, 0.9830786, -1, -21.189617557525633, 22.645009384155276, 9); 
-     void* var_15 = tensorSoftmax(var_14); 
-
-     uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-     float accuracy = computeAccuracy2(labels, batch_size, var_15, 100); 
-     final_accuracy += accuracy; 
-     freeBatchMemory(); 
- 
-   }
-
-   final_accuracy = final_accuracy / batch_count; 
-   dumpFinalAccuracy(final_accuracy); 
- }
-
- dumpExecutionAccuracies(); 
-
- llvm_hpvm_cleanupTensorRt(); 
-
- return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar10_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar10_valid.cc
deleted file mode 100644
index fbaea86634e2b561f78fd3971a731b1a734dcbaf..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar10_valid.cc
+++ /dev/null
@@ -1,137 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../../tensor_runtime/include/tensor_runtime.h" 
-#include "../../include/utils.h" 
-
-int main(){ 
-
- llvm_hpvm_initTensorRt(0); 
-
- int total_runs = 20; 
- for (int i = 0 ; i < total_runs; i++){ 
-
-   startMemTracking(); 
-
-   int test_input_size = 5000; 
-   int batch_size = 2500;
-   int offset = 0;
-   int batch_count = test_input_size / batch_size; 
-   float final_accuracy = 0.0; 
-
-   for(int i = 0; i < batch_count; i++){ 
-
-     std::string dir_prefix = std::string("../model_params/vgg16_cifar10_2/");
-       
-     std::string input_path =  dir_prefix + std::string("input.bin"); 
-     std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-     std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-     void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-     std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-     void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-     std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-     void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-     std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-     void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-     std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-     void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-     std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-     void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-     std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-     void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-     std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-     void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-     std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-     void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-     std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-     void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-     void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-     std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-     void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-     void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-     std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-     void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-     std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-     void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-     std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-     void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-     void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-     void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-     void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-     void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-     void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-     void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-     void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-     void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-     std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-     void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-     std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-     void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-     std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-     void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-     std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-     void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-     std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-     void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); 
-     std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-     void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-
-
-     int start = i * batch_size + offset; 
-     int end = (i + 1) * batch_size + offset; 
-
-     void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-     void* var_0 = ConvLayer_PROMISE(input, -1.8816367, 2.0934217, conv2d_1_w, -0.53275156, 0.49437004, conv2d_1_b, -0.6403629, 0.2490165, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.3590874671936035, 9); 
-     void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.3590874671936035, conv2d_2_w, -0.2688396, 0.20639156, conv2d_2_b, -0.7745511, 0.82006615, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.521231179237361, 9); 
-     void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 2.521231179237361, conv2d_3_w, -0.16776876, 0.14878987, conv2d_3_b, -0.35283303, 0.5154362, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.2011985784769053, 9); 
-     void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 1.2011985784769053, conv2d_4_w, -0.088948585, 0.114222586, conv2d_4_b, -0.30250227, 0.36856708, 1, 1, 1, 1, 0, 2, 1, 0.0, 1.0359880930185312, 9); 
-     void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 1.0359880930185312, conv2d_5_w, -0.07739562, 0.10973293, conv2d_5_b, -0.15568458, 0.17634983, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.3004955950379369, 9); 
-     void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 0.3004955950379369, conv2d_6_w, -0.051649556, 0.05435231, conv2d_6_b, -0.07395447, 0.07996062, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.11490475405007583, 9); 
-     void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 0.11490475405007583, conv2d_7_w, -0.043513633, 0.07577866, conv2d_7_b, -0.06921874, 0.02660573, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.16232508487999475, 9); 
-     void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 0.16232508487999475, conv2d_8_w, -0.033842053, 0.045218028, conv2d_8_b, -0.022827804, 0.023845317, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.12424996573477909, 9); 
-     void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 0.12424996573477909, conv2d_9_w, -0.02211613, 0.032084666, conv2d_9_b, -0.02699063, 0.03773564, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.1746344865113496, 9); 
-     void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 0.1746344865113496, conv2d_10_w, -0.01979376, 0.034854397, conv2d_10_b, -0.036107242, 0.07056531, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.5751757621765137, 9); 
-     void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 0.5751757621765137, conv2d_11_w, -0.03452098, 0.046055835, conv2d_11_b, -0.051925894, 0.07039055, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.7718751144409115, 9); 
-     void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 0.7718751144409115, conv2d_12_w, -0.025946895, 0.040090334, conv2d_12_b, -0.06049362, 0.12658806, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1728516906499844, 9); 
-     void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 1.1728516906499844, conv2d_13_w, -0.021766115, 0.03315237, conv2d_13_b, -0.20705001, 0.117947325, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.0015769386291495, 9); 
-     void* var_13 = FCLayer_PROMISE(var_12, 0.0, 2.0015769386291495, dense_1_w, -0.042597745, 0.046707444, dense_1_b, -0.21937433, 0.2545502, 1, 0.0, 2.002361118793486, 9); 
-     void* var_14 = FCLayer_PROMISE(var_13, 0.0, 2.002361118793486, dense_2_w, -0.32550547, 0.30829763, dense_2_b, -1.1787822, 1.2378151, -1, -18.251470546722413, 24.17363445281988, 9); 
-     void* var_15 = tensorSoftmax(var_14); 
-
-     uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-     float accuracy = computeAccuracy2(labels, batch_size, var_15); 
-     final_accuracy += accuracy; 
-     freeBatchMemory(); 
- 
-   }
-
-   final_accuracy = final_accuracy / batch_count; 
-   dumpFinalAccuracy(final_accuracy); 
-
-
- }
-
- dumpExecutionAccuracies(); 
-
- llvm_hpvm_cleanupTensorRt(); 
-
- return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_approxhalf.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_approxhalf.cc
deleted file mode 100644
index ecbfa322e9a26159e8a0bb6dad7b9cfa75f53711..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_approxhalf.cc
+++ /dev/null
@@ -1,154 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testCifarNet(){
-
-  printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
- 
-  std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); 
-  std::string input_path =  dir_prefix + std::string("norm_cifar_input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("test_labels.bin"); 
-
-  void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin",
-					  float_type, 32, 3, 3, 3);  
-  void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin",
-					  float_type, 32, 32, 3, 3);  
-  void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin",
-					float_type, 1, 32, 1, 1);
-  void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin",
-					  float_type, 64, 32, 3, 3);  
-  void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin",
-					  float_type, 64, 64, 3, 3);  
-  void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin",
-					float_type, 1, 64, 1, 1);
-  void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin",
-					  float_type, 128, 64, 3, 3);  
-  void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin",
-					float_type, 1, 128, 1, 1);
-  void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin",
-					  float_type, 128, 128, 3, 3);  
-  void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin",
-					float_type, 1, 128, 1, 1);
-  
-  void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin",
-					 float_type, 1, 1, 2048, 10);  
-  void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin",
-				      float_type, 1, 10, 1, 1);  
- 
-  
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-
-  startMemTracking();
-
-  int test_input_size = 10000;
-  int batch_size = 2500;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-  
-  for(int i = 0; i < batch_count; i++){
-
-    int start = i * batch_size;
-    int end = (i + 1) * batch_size;
-    void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-
-    // Perforates 50% filter elements and corresponding computations
-    void* conv1out = tensorConvApproxHalf2(input, conv1_filter, 1, 1, 1, 1,
-    					 conv_mode, conv_precision, 1, 1, 2, 1);
-  
-
-    tensorAdd(conv1out, conv1_bias); 
-    void* conv1_tanh = tensorTanh(conv1out);
-
-    // NOTE: Perforates 50% rows in the output
-    // 2nd Layer
-    void* conv2out = tensorConvApproxHalf2(conv1_tanh, conv2_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision, 2, 1, 1, 1);
-    tensorAdd(conv2out, conv2_bias); 
-    void* conv2_tanh = tensorTanh(conv2out);
-    void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-    // NOTE: No Approxmation - all 1s passed
-    // 3rd Layer
-    void* conv3out = tensorConvApproxHalf2(pool2out, conv3_filter, 1, 1, 1, 1,
-					  conv_mode, conv_precision, 1, 1, 1, 1);
-    tensorAdd(conv3out, conv3_bias); 
-    void* conv3_tanh = tensorTanh(conv3out);
-
-    // NOTE: No Approxmation - all 1s passed
-    // 4th Layer
-    void* conv4out = tensorConvApproxHalf2(conv3_tanh, conv4_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision, 1, 1, 1, 1);
-    tensorAdd(conv4out, conv4_bias); 
-    void* conv4_tanh = tensorTanh(conv4out);
-    void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-    // NOTE: No Approxmation - all 1s passed
-    // 5th Layer
-    void* conv5out = tensorConvApproxHalf2(pool4out, conv5_filter, 1, 1, 1, 1,
-					  conv_mode, conv_precision, 1, 1, 1, 1);
-    tensorAdd(conv5out, conv5_bias); 
-    void* conv5_tanh = tensorTanh(conv5out);
-
-    // NOTE: No Approxmation - all 1s passed
-    // 6th Layer
-    void* conv6out = tensorConvApproxHalf2(conv5_tanh, conv6_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision, 1, 1, 1, 1);
-    tensorAdd(conv6out, conv6_bias); 
-  
-    void* conv6_tanh = tensorTanh(conv6out);
-    void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2);
-    
-    // final FC Layer
-    void* gemm1out = tensorGemmGPU(pool6out, fc1_weights);  
-    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-    void* result = tensorSoftmax(gemm1biasout);
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, result); 
-    final_accuracy += accuracy;
-    
-    freeBatchMemory();
-  }
-
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count;
-  dumpFinalAccuracy(final_accuracy);
-
-}
-
-
-int main(int argc, char* argv[]){
-
-  llvm_hpvm_initTensorRt(0);
-
-  testCifarNet();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_sampsim.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_sampsim.cc
deleted file mode 100644
index a0ac48b5ef41002760c2ad6cd8882639e98f1699..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_sampsim.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testCifarNet(){
-
-  printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
- 
-  std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); 
-  std::string input_path =  dir_prefix + std::string("norm_cifar_input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("test_labels.bin"); 
-
-  void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin",
-					  float_type, 32, 3, 3, 3);  
-  void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin",
-					  float_type, 32, 32, 3, 3);  
-  void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin",
-					float_type, 1, 32, 1, 1);
-  void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin",
-					  float_type, 64, 32, 3, 3);  
-  void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin",
-					  float_type, 64, 64, 3, 3);  
-  void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin",
-					float_type, 1, 64, 1, 1);
-  void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin",
-					  float_type, 128, 64, 3, 3);  
-  void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin",
-					float_type, 1, 128, 1, 1);
-  void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin",
-					  float_type, 128, 128, 3, 3);  
-  void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin",
-					float_type, 1, 128, 1, 1);
-  
-  void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin",
-					 float_type, 1, 1, 2048, 10);  
-  void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin",
-				      float_type, 1, 10, 1, 1);  
- 
-  
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-
-  startMemTracking();
-
-  int test_input_size = 10000;
-  int batch_size = 2500;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-  
-  for(int i = 0; i < batch_count; i++){
-
-    int start = i * batch_size;
-    int end = (i + 1) * batch_size;
-    void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-
-    void* conv1out = tensorConvSampSim(input, conv1_filter, 1, 1, 1, 1,
-    					 conv_mode, conv_precision, 2, 0);
-   
-
-    tensorAdd(conv1out, conv1_bias); 
-    void* conv1_tanh = tensorTanh(conv1out);
-    
-    // 2nd Layer
-    void* conv2out = tensorConvApproxHalf(conv1_tanh, conv2_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision, 1, 1, 1, 1);
-    tensorAdd(conv2out, conv2_bias); 
-    void* conv2_tanh = tensorTanh(conv2out);
-    void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
-     
-    // 3rd Layer
-    void* conv3out = tensorConvApproxHalf(pool2out, conv3_filter, 1, 1, 1, 1,
-					  conv_mode, conv_precision, 1, 1, 1, 1);
-    tensorAdd(conv3out, conv3_bias); 
-    void* conv3_tanh = tensorTanh(conv3out);
-
-    // 4th Layer
-    void* conv4out = tensorConvApproxHalf(conv3_tanh, conv4_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision, 1, 1, 1, 1);
-    tensorAdd(conv4out, conv4_bias); 
-    void* conv4_tanh = tensorTanh(conv4out);
-    void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2);
-    
-    // 5th Layer
-    void* conv5out = tensorConvApproxHalf(pool4out, conv5_filter, 1, 1, 1, 1,
-					  conv_mode, conv_precision, 1, 1, 1, 1);
-    tensorAdd(conv5out, conv5_bias); 
-    void* conv5_tanh = tensorTanh(conv5out);
-
-    // 6th Layer
-    void* conv6out = tensorConvApproxHalf(conv5_tanh, conv6_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision, 1, 1, 1, 1);
-    tensorAdd(conv6out, conv6_bias); 
-  
-    void* conv6_tanh = tensorTanh(conv6out);
-    void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2);
-    
-    // final FC Layer
-    void* gemm1out = tensorGemmGPU(pool6out, fc1_weights);  
-    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-    void* result = tensorSoftmax(gemm1biasout);
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, result); 
-    final_accuracy += accuracy;
-    
-    freeBatchMemory();
-  }
-
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count;
-  dumpFinalAccuracy(final_accuracy);
-
-}
-
-
-int main(int argc, char* argv[]){
-
-  llvm_hpvm_initTensorRt(0);
-
-  testCifarNet();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_approx.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_approx.cc
deleted file mode 100644
index 7713b8105ac0f9bc6f1dae6899548599e5ede0ce..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_approx.cc
+++ /dev/null
@@ -1,196 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testCifarNet(){
-
-  int total_runs = 100;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* CIFAR-10 DNN ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 1000; //5000
-
-  //uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size);
-  uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size);
-    
-  void* input = readTrainedWeights("../model_params/alexnet_cifar10/norm_cifar_input.bin",
-			  	   float_type,
-				   test_batch_size, 3, 32, 32);
-
-  void* conv1_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv1.bin",
-					  float_type, 64, 3, 11, 11);  
-  void* conv1_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv1_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv2.bin",
-					  float_type, 192, 64, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv2_bias.bin",
-					float_type, 1, 192, 1, 1);
-
-  void* conv3_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv3.bin",
-					  float_type, 384, 192, 3, 3);  
-  void* conv3_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv3_bias.bin",
-					float_type, 1, 384, 1, 1);  
-  void* conv4_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv4.bin",
-					  float_type, 256, 384, 3, 3);  
-  void* conv4_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv4_bias.bin",
-					float_type, 1, 256, 1, 1);
-  void* conv5_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv5.bin",
-					  float_type, 256, 256, 3, 3);  
-  void* conv5_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv5_bias.bin",
-					float_type, 1, 256, 1, 1);
-  
-  void* fc1_weights = readTrainedWeights("../model_params/alexnet_cifar10/fc1.bin",
-					 float_type, 1, 1, 4096, 10);  
-  void* fc1_bias = readTrainedWeights("../model_params/alexnet_cifar10/fc1_bias.bin",
-				      float_type, 1, 10, 1, 1);  
- 
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performnce profiling 
-    startProfiling();
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* conv1out = tensorConvPerf(input, conv1_filter, 5, 5, 1, 1,
-				    conv_mode, conv_precision, 0, 0);
-
-    tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
-
-    void* conv1_tanh = tensorTanh(conv1out);
-
-    void* pool1out = tensorPooling(conv1_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-    // 2nd Layer
-    void* conv2out = tensorConvPerf(pool1out, conv2_filter, 2, 2, 1, 1,
-				    conv_mode, conv_precision, 0, 0);
-    tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
-
-    void* conv2_tanh = tensorTanh(conv2out);
-
-    void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
-      
-
-    // 3rd Layer
-    void* conv3out = tensorConvPerf(pool2out, conv3_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision, 0, 0);
-    tensorAdd(conv3out, conv3_bias); // NOTE: In place operation
-  
-    void* conv3_tanh = tensorTanh(conv3out);
-
-    // 4th Layer
-    void* conv4out = tensorConvPerf(conv3_tanh, conv4_filter, 1, 1, 1, 1,
-					     conv_mode, conv_precision, 2, 0);
-    tensorAdd(conv4out, conv4_bias); // NOTE: In place operation
-  
-    void* conv4_tanh = tensorTanh(conv4out);
-    
-    // 5th Layer
-    void* conv5out = tensorConvPerf(conv4_tanh, conv5_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision, 0, 0);
-    tensorAdd(conv5out, conv5_bias); // NOTE: In place operation
-  
-    void* conv5_tanh = tensorTanh(conv5out);
-
-    void* pool5out = tensorPooling(conv5_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-    // final FC Layer
-    void* gemm1out = tensorGemmGPU(pool5out, fc1_weights);  
-
-    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-
-    void* result = tensorSoftmax(gemm1biasout);
-
-    printTensorDims(result);
-    
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(1);
-
-  testCifarNet();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_old.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_old.cc
deleted file mode 100644
index 3e5cec7d0760252ebff1b31293a51bdf570415f4..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_old.cc
+++ /dev/null
@@ -1,196 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testCifarNet(){
-
-  int total_runs = 100;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* CIFAR-10 DNN ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 5000;
-
-  //uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size);
-  uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size);
-    
-  void* input = readTrainedWeights("../model_params/alexnet_cifar10/norm_cifar_input.bin",
-			  	   float_type,
-				   test_batch_size, 3, 32, 32);
-
-  void* conv1_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv1.bin",
-					  float_type, 64, 3, 11, 11);  
-  void* conv1_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv1_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv2.bin",
-					  float_type, 192, 64, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv2_bias.bin",
-					float_type, 1, 192, 1, 1);
-
-  void* conv3_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv3.bin",
-					  float_type, 384, 192, 3, 3);  
-  void* conv3_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv3_bias.bin",
-					float_type, 1, 384, 1, 1);  
-  void* conv4_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv4.bin",
-					  float_type, 256, 384, 3, 3);  
-  void* conv4_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv4_bias.bin",
-					float_type, 1, 256, 1, 1);
-  void* conv5_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv5.bin",
-					  float_type, 256, 256, 3, 3);  
-  void* conv5_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv5_bias.bin",
-					float_type, 1, 256, 1, 1);
-  
-  void* fc1_weights = readTrainedWeights("../model_params/alexnet_cifar10/fc1.bin",
-					 float_type, 1, 1, 4096, 10);  
-  void* fc1_bias = readTrainedWeights("../model_params/alexnet_cifar10/fc1_bias.bin",
-				      float_type, 1, 10, 1, 1);  
- 
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performnce profiling 
-    startProfiling();
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* conv1out = tensorConvolution(input, conv1_filter, 5, 5, 1, 1,
-				       conv_mode, conv_precision);
-
-    tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
-
-    void* conv1_tanh = tensorTanh(conv1out);
-
-    void* pool1out = tensorPooling(conv1_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-    // 2nd Layer
-    void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
-
-    void* conv2_tanh = tensorTanh(conv2out);
-
-    void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
-      
-
-    // 3rd Layer
-    void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv3out, conv3_bias); // NOTE: In place operation
-  
-    void* conv3_tanh = tensorTanh(conv3out);
-
-    // 4th Layer
-    void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv4out, conv4_bias); // NOTE: In place operation
-  
-    void* conv4_tanh = tensorTanh(conv4out);
-    
-    // 5th Layer
-    void* conv5out = tensorConvolution(conv4_tanh, conv5_filter, 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv5out, conv5_bias); // NOTE: In place operation
-  
-    void* conv5_tanh = tensorTanh(conv5out);
-
-    void* pool5out = tensorPooling(conv5_tanh, 0, 2, 2, 0, 0, 2, 2);
-
-    // final FC Layer
-    void* gemm1out = tensorGemmGPU(pool5out, fc1_weights);  
-
-    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-
-    void* result = tensorSoftmax(gemm1biasout);
-
-    printTensorDims(result);
-    
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      const char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(1);
-
-  testCifarNet();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/depthwise_batchnorm.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/depthwise_batchnorm.cc
deleted file mode 100644
index 84710565de3b2fdde6eca5d84c9e3f324eba1d50..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/depthwise_batchnorm.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/depthwise_batchnorm2/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string depthwise_conv2d_1_b_path =  dir_prefix + std::string("depthwise_conv2d_1_b.bin"); 
-  void* depthwise_conv2d_1_b =  readTrainedWeights(depthwise_conv2d_1_b_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,6272,1024); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-
-
-
-  startMemTracking(); 
-
-  int test_input_size = 10000; 
-  int batch_size = 10000; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size; 
-    int end = (i + 1) * batch_size; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); 
-
-    void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 1); 
-    void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-    void* var_2 = tensorRelu(var_1); 
-    void* var_3 = tensorBatchNorm(var_2,batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-    void* var_4 = tensorPooling(var_3,0,2,2,0,0,2,2); 
-    void* var_5 = tensorConvolution(var_4, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-    void* var_6 = tensorAdd(var_5, depthwise_conv2d_1_b); 
-    void* var_7 = tensorRelu(var_6); 
-    void* var_8 = tensorBatchNorm(var_7,batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-    void* var_10 = tensorGemmGPU(var_8, dense_1_w); 
-    void* var_11 = tensorAdd(var_10, dense_1_b); 
-    void* var_12 = tensorRelu(var_11); 
-    void* var_13 = tensorGemmGPU(var_12, dense_2_w); 
-    void* var_14 = tensorAdd(var_13, dense_2_b); 
-    void* var_15 = tensorRelu(var_14); 
-    void* var_16 = tensorSoftmax(var_15); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, var_16); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/fc2_clipped_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/fc2_clipped_promise.cc
deleted file mode 100644
index d7addd7283e24bedfc32d57d84c4ce17d9966f57..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/fc2_clipped_promise.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-#include "../../include/types.h"
-
-
-
-void test_2_Layer_clipped_FC(){
-
-  printf("********* 2-Layer FC with clipped activations and weights ********* \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000;
-  
-  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
-				   float_type, test_batch_size, 1, 28, 28);  
-  void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin",
-					 float_type, 1, 1, 784, 128);  
-  void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin",
-				      float_type, 1, 128, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin",
-					 float_type, 1, 1, 128, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-  // Start execution profiling Tensor ops
-  startProfiling();
-  
-  // Layer-1
-  void* fc1out = tensorGemmGPU(input, fc1_weights);  
-  printTensorDims(fc1out);
-  dumpWeightsToFile("tensors_out2/fc1out.out", fc1out);  
-
-  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
-  printTensorDims(fc1_bias_out);
-  dumpWeightsToFile("tensors_out2/fc1_bias.out", fc1_bias_out);  
-
-  void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2);
-  printTensorDims(fc1_relu);
-  dumpWeightsToFile("tensors_out2/fc1_clipped_relu.out", fc1_relu);  
-
-  // Layer-2
-  void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
-  printTensorDims(fc2out);
-  dumpWeightsToFile("tensors_out2/fc2out.out", fc2out);  
-
-  
-  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
-  printTensorDims(fc2_bias_out);
-
-  void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2);
-  printTensorDims(fc2_relu);
-
-  void* result = tensorSoftmax(fc2_relu);
-  printTensorDims(result);
-
-  stopProfiling();
-  
-  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
-}
-
-
-
-int main(){
-
-  // This initializes the runtime - must be called before anything
-  llvm_hpvm_initTensorRt(0);
-
-  test_2_Layer_clipped_FC();
-
-  llvm_hpvm_cleanupTensorRt();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet2_tanh.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet2_tanh.cc
deleted file mode 100644
index d2d663552fdab6366f28655ca835ba63cb4fcee4..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet2_tanh.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-bool Opentuner_run = false;
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetTanh(){
-
-  int total_runs = 1;
-  if(Opentuner_run){
-    total_runs = 1000000;
-  }
-
-  
-  printf("********* Lenet-2 Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 5000;
-
-  uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size);
-  
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				CUDNN_DATA_FLOAT,
-				test_batch_size, 1, 28, 28);
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* conv1_filter = readTrainedWeights("../model_params/lenet_tanh2/conv1.bin",
-					  float_type, 32, 1, 5, 5);    
-  void* conv1_bias = readTrainedWeights("../model_params/lenet_tanh2/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/lenet_tanh2/conv2.bin",
-					  float_type, 64, 32, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/lenet_tanh2/conv2_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* fc1_weights = readTrainedWeights("../model_params/lenet_tanh2/fc1.bin",
-					 float_type, 1, 1, 7*7*64, 1024);  
-  void* fc1_bias = readTrainedWeights("../model_params/lenet_tanh2/fc1_bias.bin",
-				      float_type, 1, 1024, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/lenet_tanh2/fc2.bin",
-					 float_type, 1, 1, 1024, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/lenet_tanh2/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-  
-  clearTensorMap();
-  
-  for(int i = 0; i < total_runs; i++){
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd = open(myfifo, O_RDONLY);
-
-      int ret_val = fcntl(fd, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-
-      char str[100];
-      read(fd, str, 80);
-      if(strcmp(str, "stop_run") == 0){
-	abort();
-      }
-
-      close(fd);
-    }
-
-    
-    readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters
-
-    // Start power and performnce profiling 
-    startProfiling();
-  
-    int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-    int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-    // NOTE: 'SAME' convolution
-    void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-
-    // NOTE: For tensorAdd, the only dimension that MUST match is channels  
-    tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
-
-    void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
-
-    void* conv1_tanh = tensorTanh(pool1out);
-
-    // NOTE: input channels have to match between tensor op inputs and outputs 
-    void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-    tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
-
-    void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
-
-    void* conv2_tanh = tensorTanh(pool2out);
-
-    void* gemm1out = tensorGemmGPU(conv2_tanh, fc1_weights);  
-
-    void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-
-    void* tanh1out = tensorTanh(gemm1biasout);
-  
-    void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights);  
-  
-    void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
-
-    void* tanh2out = tensorTanh(gemm2_biasout);
-  
-    void* result = tensorSoftmax(tanh2out);
-
-    // End profiling and dump output to profile.txt
-    stopProfiling();
-  
-    computeAccuracy2(labels, test_batch_size, result);
-    
-    dumpAccuracyNorms();
-    freeOutputTensors();  
-
-    if(Opentuner_run){
-
-      char* myfifo = "/tmp/myfifo";
-      int fd_out = open(myfifo, O_WRONLY);
-      int ret_val = fcntl(fd_out, F_GETFD);
-      if(ret_val == -1){
-	printf("Invalid descriptor \n");
-	abort();
-      }
-      
-      const char* str = "completed***!\n\0";
-      write(fd_out, str, 80);
-      close(fd_out);
-    }
-    
-  }
-
-
-  
-}
-
-
-int main(int argc, char* argv[]){
-
-  if(argc > 1)
-    Opentuner_run = true;
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenetTanh();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_front.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_front.cc
deleted file mode 100644
index effb293a8b63119015ed8dbf5f8938eb62f2f89c..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_front.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/lenet_front/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  void* input = readTrainedWeights(input_path.c_str(), 0,10000,1,28,28); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  uint8_t* labels = readLabels(labels_path.c_str(),10000); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv0.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv_bias0.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv_bias2.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("fc5.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); 
-  std::string dense_1_b_path =  dir_prefix + std::string("fc_bias5.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("fc6.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_2_b_path =  dir_prefix + std::string("fc_bias6.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-
-
-  void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 0); 
-  void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-  void* var_2 = tensorTanh(var_1); 
-  void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); 
-  void* var_4 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); 
-  void* var_5 = tensorAdd(var_4, conv2d_2_b); 
-  void* var_6 = tensorTanh(var_5); 
-  void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); 
-  void* var_9 = tensorGemmGPU(var_7, dense_1_w); 
-  void* var_10 = tensorAdd(var_9, dense_1_b); 
-  void* var_11 = tensorTanh(var_10); 
-  void* var_12 = tensorGemmGPU(var_11, dense_2_w); 
-  void* var_13 = tensorAdd(var_12, dense_2_b); 
-  void* var_14 = tensorTanh(var_13); 
-  void* var_15 = tensorSoftmax(var_14); 
-
-  computeAccuracy2(labels, 10000, var_15);
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_int32.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_int32.cc
deleted file mode 100644
index 03a4137004fe063a4536efec8fa7ecf2d8d2b374..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_int32.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../../keras/data/lenet_test_8/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-
-
-
-  startMemTracking(); 
-
-  int test_input_size = 10000; 
-  int batch_size = 10000; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size; 
-    int end = (i + 1) * batch_size; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); 
-
-    void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 1); 
-    void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-    void* var_2 = tensorRelu(var_1); 
-    void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); 
-    void* var_4 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 1); 
-    void* var_5 = tensorAdd(var_4, conv2d_2_b); 
-    void* var_6 = tensorRelu(var_5); 
-    void* var_8 = tensorConvolution(var_6, conv2d_3_w, 1, 1, 2, 2, 1, 1); 
-    void* var_9 = tensorAdd(var_8, conv2d_3_b); 
-    void* var_10 = tensorRelu(var_9); 
-    void* var_12 = tensorGemmGPU(var_10, dense_1_w); 
-    void* var_13 = tensorAdd(var_12, dense_1_b); 
-    void* var_14 = tensorRelu(var_13); 
-    void* var_15 = tensorGemmGPU(var_14, dense_2_w); 
-    void* var_16 = tensorAdd(var_15, dense_2_b); 
-    void* var_17 = tensorRelu(var_16); 
-    void* var_18 = tensorSoftmax(var_17); 
-
-    uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy3(labels, var_18); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_layers.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_layers.cc
deleted file mode 100644
index a6b777e36c1b31440a3ad7d227df4915b1cc27df..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_layers.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-
-int main(){ 
-
-  llvm_hpvm_initializeRuntimeController("tuner_confs.txt", "quant_ranges_rt.txt");
-  llvm_hpvm_initApproxhpvmRt(0);
-  
-
-  std::string dir_prefix = std::string("../model_params/lenet_relu/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  void* input = readTrainedWeights(input_path.c_str(), 0,10000,1,28,28); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  uint8_t* labels = readLabels(labels_path.c_str(),10000); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-
-
-  void* var_0 = wrapper_ConvLayer("1", input, conv2d_1_w, conv2d_1_b, 2, 2, 1, 1, 0, 2, 1, 0, 0); 
-  void* var_1 = wrapper_ConvLayer("2", var_0, conv2d_2_w, conv2d_2_b, 2, 2, 1, 1, -1, 0, 1, 0, 0); 
-  void* var_2 = wrapper_ConvLayer("3", var_1, conv2d_3_w, conv2d_3_b, 1, 1, 2, 2, -1, 0, 1, 0, 0); 
-  void* var_3 = wrapper_FCLayer("4", var_2, dense_1_w, dense_1_b, 1, 0, 0); 
-  void* var_4 = wrapper_FCLayer("5", var_3, dense_2_w, dense_2_b, 1, 0, 0); 
-  void* var_5 = tensorSoftmax(var_4); 
-
-  computeAccuracy2(labels,10000,var_5); 
-
-  
-  llvm_hpvm_cleanupApproxhpvmRt(); 
-  llvm_hpvm_clearRuntimeController();
-
-  
-  return 0; 
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mio_test.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mio_test.cc
deleted file mode 100644
index 646582146e1fd4b4819ee47a071d630428ed7f70..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mio_test.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/hpvm_mio/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,32,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1600,256); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,256,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,256,5); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,5,1,1); 
-
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000; 
-  int batch_size = 5000; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size; 
-    int end = (i + 1) * batch_size; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-    void* var_0 = tensorConvolution(input, conv2d_1_w, 0, 0, 1, 1, 1, 1); 
-    void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-    void* var_2 = tensorRelu(var_1); 
-    void* var_3 = tensorConvolution(var_2, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-    void* var_4 = tensorAdd(var_3, conv2d_2_b); 
-    void* var_5 = tensorRelu(var_4); 
-    void* var_6 = tensorPooling(var_5,0,2,2,0,0,2,2); 
-    void* var_8 = tensorConvolution(var_6, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-    void* var_9 = tensorAdd(var_8, conv2d_3_b); 
-    void* var_10 = tensorRelu(var_9); 
-    void* var_11 = tensorConvolution(var_10, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-    void* var_12 = tensorAdd(var_11, conv2d_4_b); 
-    void* var_13 = tensorRelu(var_12); 
-    void* var_14 = tensorPooling(var_13,0,2,2,0,0,2,2); 
-    void* var_17 = tensorGemmGPU(var_14, dense_1_w); 
-    void* var_18 = tensorAdd(var_17, dense_1_b); 
-    void* var_19 = tensorRelu(var_18); 
-    void* var_21 = tensorGemmGPU(var_19, dense_2_w); 
-    void* var_22 = tensorAdd(var_21, dense_2_b); 
-    void* var_23 = tensorSoftmax(var_22); 
-
-    uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy3(labels, var_23); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_old.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_old.cc
deleted file mode 100644
index ba7af9846916057fedc05757bdad77fefb01590e..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_old.cc
+++ /dev/null
@@ -1,413 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(1); 
-
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_hpvm_3/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_7_w_path =  dir_prefix + std::string("depthwise_conv2d_7_w.bin"); 
-  void* depthwise_conv2d_7_w =  readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_14_gamma_path =  dir_prefix + std::string("batch_normalization_14_gamma.bin"); 
-  void* batch_normalization_14_gamma =  readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_beta_path =  dir_prefix + std::string("batch_normalization_14_beta.bin"); 
-  void* batch_normalization_14_beta =  readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_mean_path =  dir_prefix + std::string("batch_normalization_14_mean.bin"); 
-  void* batch_normalization_14_mean =  readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_14_variance_path =  dir_prefix + std::string("batch_normalization_14_variance.bin"); 
-  void* batch_normalization_14_variance =  readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_15_gamma_path =  dir_prefix + std::string("batch_normalization_15_gamma.bin"); 
-  void* batch_normalization_15_gamma =  readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_beta_path =  dir_prefix + std::string("batch_normalization_15_beta.bin"); 
-  void* batch_normalization_15_beta =  readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_mean_path =  dir_prefix + std::string("batch_normalization_15_mean.bin"); 
-  void* batch_normalization_15_mean =  readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_15_variance_path =  dir_prefix + std::string("batch_normalization_15_variance.bin"); 
-  void* batch_normalization_15_variance =  readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_8_w_path =  dir_prefix + std::string("depthwise_conv2d_8_w.bin"); 
-  void* depthwise_conv2d_8_w =  readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_16_gamma_path =  dir_prefix + std::string("batch_normalization_16_gamma.bin"); 
-  void* batch_normalization_16_gamma =  readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_beta_path =  dir_prefix + std::string("batch_normalization_16_beta.bin"); 
-  void* batch_normalization_16_beta =  readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_mean_path =  dir_prefix + std::string("batch_normalization_16_mean.bin"); 
-  void* batch_normalization_16_mean =  readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_16_variance_path =  dir_prefix + std::string("batch_normalization_16_variance.bin"); 
-  void* batch_normalization_16_variance =  readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_17_gamma_path =  dir_prefix + std::string("batch_normalization_17_gamma.bin"); 
-  void* batch_normalization_17_gamma =  readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_beta_path =  dir_prefix + std::string("batch_normalization_17_beta.bin"); 
-  void* batch_normalization_17_beta =  readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_mean_path =  dir_prefix + std::string("batch_normalization_17_mean.bin"); 
-  void* batch_normalization_17_mean =  readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_17_variance_path =  dir_prefix + std::string("batch_normalization_17_variance.bin"); 
-  void* batch_normalization_17_variance =  readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_9_w_path =  dir_prefix + std::string("depthwise_conv2d_9_w.bin"); 
-  void* depthwise_conv2d_9_w =  readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_18_gamma_path =  dir_prefix + std::string("batch_normalization_18_gamma.bin"); 
-  void* batch_normalization_18_gamma =  readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_beta_path =  dir_prefix + std::string("batch_normalization_18_beta.bin"); 
-  void* batch_normalization_18_beta =  readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_mean_path =  dir_prefix + std::string("batch_normalization_18_mean.bin"); 
-  void* batch_normalization_18_mean =  readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_18_variance_path =  dir_prefix + std::string("batch_normalization_18_variance.bin"); 
-  void* batch_normalization_18_variance =  readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_19_gamma_path =  dir_prefix + std::string("batch_normalization_19_gamma.bin"); 
-  void* batch_normalization_19_gamma =  readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_beta_path =  dir_prefix + std::string("batch_normalization_19_beta.bin"); 
-  void* batch_normalization_19_beta =  readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_mean_path =  dir_prefix + std::string("batch_normalization_19_mean.bin"); 
-  void* batch_normalization_19_mean =  readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_19_variance_path =  dir_prefix + std::string("batch_normalization_19_variance.bin"); 
-  void* batch_normalization_19_variance =  readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_10_w_path =  dir_prefix + std::string("depthwise_conv2d_10_w.bin"); 
-  void* depthwise_conv2d_10_w =  readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_20_gamma_path =  dir_prefix + std::string("batch_normalization_20_gamma.bin"); 
-  void* batch_normalization_20_gamma =  readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_beta_path =  dir_prefix + std::string("batch_normalization_20_beta.bin"); 
-  void* batch_normalization_20_beta =  readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_mean_path =  dir_prefix + std::string("batch_normalization_20_mean.bin"); 
-  void* batch_normalization_20_mean =  readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_20_variance_path =  dir_prefix + std::string("batch_normalization_20_variance.bin"); 
-  void* batch_normalization_20_variance =  readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_21_gamma_path =  dir_prefix + std::string("batch_normalization_21_gamma.bin"); 
-  void* batch_normalization_21_gamma =  readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_beta_path =  dir_prefix + std::string("batch_normalization_21_beta.bin"); 
-  void* batch_normalization_21_beta =  readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_mean_path =  dir_prefix + std::string("batch_normalization_21_mean.bin"); 
-  void* batch_normalization_21_mean =  readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_21_variance_path =  dir_prefix + std::string("batch_normalization_21_variance.bin"); 
-  void* batch_normalization_21_variance =  readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_11_w_path =  dir_prefix + std::string("depthwise_conv2d_11_w.bin"); 
-  void* depthwise_conv2d_11_w =  readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_22_gamma_path =  dir_prefix + std::string("batch_normalization_22_gamma.bin"); 
-  void* batch_normalization_22_gamma =  readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_beta_path =  dir_prefix + std::string("batch_normalization_22_beta.bin"); 
-  void* batch_normalization_22_beta =  readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_mean_path =  dir_prefix + std::string("batch_normalization_22_mean.bin"); 
-  void* batch_normalization_22_mean =  readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_22_variance_path =  dir_prefix + std::string("batch_normalization_22_variance.bin"); 
-  void* batch_normalization_22_variance =  readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); 
-  std::string batch_normalization_23_gamma_path =  dir_prefix + std::string("batch_normalization_23_gamma.bin"); 
-  void* batch_normalization_23_gamma =  readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_beta_path =  dir_prefix + std::string("batch_normalization_23_beta.bin"); 
-  void* batch_normalization_23_beta =  readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_mean_path =  dir_prefix + std::string("batch_normalization_23_mean.bin"); 
-  void* batch_normalization_23_mean =  readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_23_variance_path =  dir_prefix + std::string("batch_normalization_23_variance.bin"); 
-  void* batch_normalization_23_variance =  readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); 
-  std::string depthwise_conv2d_12_w_path =  dir_prefix + std::string("depthwise_conv2d_12_w.bin"); 
-  void* depthwise_conv2d_12_w =  readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); 
-  std::string batch_normalization_24_gamma_path =  dir_prefix + std::string("batch_normalization_24_gamma.bin"); 
-  void* batch_normalization_24_gamma =  readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_beta_path =  dir_prefix + std::string("batch_normalization_24_beta.bin"); 
-  void* batch_normalization_24_beta =  readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_mean_path =  dir_prefix + std::string("batch_normalization_24_mean.bin"); 
-  void* batch_normalization_24_mean =  readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_24_variance_path =  dir_prefix + std::string("batch_normalization_24_variance.bin"); 
-  void* batch_normalization_24_variance =  readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); 
-  std::string batch_normalization_25_gamma_path =  dir_prefix + std::string("batch_normalization_25_gamma.bin"); 
-  void* batch_normalization_25_gamma =  readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_beta_path =  dir_prefix + std::string("batch_normalization_25_beta.bin"); 
-  void* batch_normalization_25_beta =  readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_mean_path =  dir_prefix + std::string("batch_normalization_25_mean.bin"); 
-  void* batch_normalization_25_mean =  readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_25_variance_path =  dir_prefix + std::string("batch_normalization_25_variance.bin"); 
-  void* batch_normalization_25_variance =  readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string depthwise_conv2d_13_w_path =  dir_prefix + std::string("depthwise_conv2d_13_w.bin"); 
-  void* depthwise_conv2d_13_w =  readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); 
-  std::string batch_normalization_26_gamma_path =  dir_prefix + std::string("batch_normalization_26_gamma.bin"); 
-  void* batch_normalization_26_gamma =  readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_beta_path =  dir_prefix + std::string("batch_normalization_26_beta.bin"); 
-  void* batch_normalization_26_beta =  readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_mean_path =  dir_prefix + std::string("batch_normalization_26_mean.bin"); 
-  void* batch_normalization_26_mean =  readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_26_variance_path =  dir_prefix + std::string("batch_normalization_26_variance.bin"); 
-  void* batch_normalization_26_variance =  readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); 
-  std::string batch_normalization_27_gamma_path =  dir_prefix + std::string("batch_normalization_27_gamma.bin"); 
-  void* batch_normalization_27_gamma =  readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_beta_path =  dir_prefix + std::string("batch_normalization_27_beta.bin"); 
-  void* batch_normalization_27_beta =  readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_mean_path =  dir_prefix + std::string("batch_normalization_27_mean.bin"); 
-  void* batch_normalization_27_mean =  readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); 
-  std::string batch_normalization_27_variance_path =  dir_prefix + std::string("batch_normalization_27_variance.bin"); 
-  void* batch_normalization_27_variance =  readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-
-  startMemTracking(); 
-
-  int test_input_size = 3000; 
-  int batch_size = 1000; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size; 
-    int end = (i + 1) * batch_size; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-    void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-    void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-    void* var_2 = tensorRelu(var_1); 
-    void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-    void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-    void* var_5 = tensorRelu(var_4); 
-    void* var_6 = tensorConvolution(var_5, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-    void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-    void* var_8 = tensorRelu(var_7); 
-    void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-    void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-    void* var_11 = tensorRelu(var_10); 
-    void* var_12 = tensorConvolution(var_11, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-    void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-    void* var_14 = tensorRelu(var_13); 
-    void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-    void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-    void* var_17 = tensorRelu(var_16); 
-    void* var_18 = tensorConvolution(var_17, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-    void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-    void* var_20 = tensorRelu(var_19); 
-    void* var_22 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-    void* var_23 = tensorBatchNorm(var_22, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-    void* var_24 = tensorRelu(var_23); 
-    void* var_25 = tensorConvolution(var_24, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-    void* var_26 = tensorBatchNorm(var_25, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-    void* var_27 = tensorRelu(var_26); 
-    void* var_28 = tensorConvolution(var_27, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-    void* var_29 = tensorBatchNorm(var_28, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-    void* var_30 = tensorRelu(var_29); 
-    void* var_31 = tensorConvolution(var_30, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-    void* var_32 = tensorBatchNorm(var_31, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-    void* var_33 = tensorRelu(var_32); 
-    void* var_35 = tensorConvolution(var_33, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-    void* var_36 = tensorBatchNorm(var_35, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-    void* var_37 = tensorRelu(var_36); 
-    void* var_38 = tensorConvolution(var_37, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-    void* var_39 = tensorBatchNorm(var_38, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-    void* var_40 = tensorRelu(var_39); 
-    void* var_41 = tensorConvolution(var_40, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); 
-    void* var_42 = tensorBatchNorm(var_41, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); 
-    void* var_43 = tensorRelu(var_42); 
-    void* var_44 = tensorConvolution(var_43, conv2d_8_w, 0, 0, 1, 1, 1, 1); 
-    void* var_45 = tensorBatchNorm(var_44, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); 
-    void* var_46 = tensorRelu(var_45); 
-    void* var_47 = tensorConvolution(var_46, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); 
-    void* var_48 = tensorBatchNorm(var_47, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); 
-    void* var_49 = tensorRelu(var_48); 
-    void* var_50 = tensorConvolution(var_49, conv2d_9_w, 0, 0, 1, 1, 1, 1); 
-    void* var_51 = tensorBatchNorm(var_50, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); 
-    void* var_52 = tensorRelu(var_51); 
-    void* var_54 = tensorConvolution(var_52, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); 
-    void* var_55 = tensorBatchNorm(var_54, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); 
-    void* var_56 = tensorRelu(var_55); 
-    void* var_57 = tensorConvolution(var_56, conv2d_10_w, 0, 0, 1, 1, 1, 1); 
-    void* var_58 = tensorBatchNorm(var_57, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); 
-    void* var_59 = tensorRelu(var_58); 
-    void* var_60 = tensorConvolution(var_59, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); 
-    void* var_61 = tensorBatchNorm(var_60, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); 
-    void* var_62 = tensorRelu(var_61); 
-    void* var_63 = tensorConvolution(var_62, conv2d_11_w, 0, 0, 1, 1, 1, 1); 
-    void* var_64 = tensorBatchNorm(var_63, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); 
-    void* var_65 = tensorRelu(var_64); 
-    void* var_66 = tensorConvolution(var_65, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); 
-    void* var_67 = tensorBatchNorm(var_66, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); 
-    void* var_68 = tensorRelu(var_67); 
-    void* var_69 = tensorConvolution(var_68, conv2d_12_w, 0, 0, 1, 1, 1, 1); 
-    void* var_70 = tensorBatchNorm(var_69, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); 
-    void* var_71 = tensorRelu(var_70); 
-    void* var_73 = tensorConvolution(var_71, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); 
-    void* var_74 = tensorBatchNorm(var_73, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); 
-    void* var_75 = tensorRelu(var_74); 
-    void* var_76 = tensorConvolution(var_75, conv2d_13_w, 0, 0, 1, 1, 1, 1); 
-    void* var_77 = tensorBatchNorm(var_76, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); 
-    void* var_78 = tensorRelu(var_77); 
-    void* var_79 = tensorConvolution(var_78, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); 
-    void* var_80 = tensorBatchNorm(var_79, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); 
-    void* var_81 = tensorRelu(var_80); 
-    void* var_82 = tensorConvolution(var_81, conv2d_14_w, 0, 0, 1, 1, 1, 1); 
-    void* var_83 = tensorBatchNorm(var_82, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); 
-    void* var_84 = tensorRelu(var_83); 
-    void* var_86 = tensorPooling(var_84,1,2,2,0,0,2,2); 
-    void* var_88 = tensorGemmGPU(var_86, dense_1_w); 
-    void* var_89 = tensorAdd(var_88, dense_1_b); 
-    void* var_90 = tensorSoftmax(var_89); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, var_90); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_shallow2.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_shallow2.cc
deleted file mode 100644
index ee2c51b2399505d3a98b54920d9700dbd0548b86..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_shallow2.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  std::string dir_prefix = std::string("../model_params/mobilenet_shallow2/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); 
-  std::string batch_normalization_1_gamma_path =  dir_prefix + std::string("batch_normalization_1_gamma.bin"); 
-  void* batch_normalization_1_gamma =  readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_beta_path =  dir_prefix + std::string("batch_normalization_1_beta.bin"); 
-  void* batch_normalization_1_beta =  readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_mean_path =  dir_prefix + std::string("batch_normalization_1_mean.bin"); 
-  void* batch_normalization_1_mean =  readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_1_variance_path =  dir_prefix + std::string("batch_normalization_1_variance.bin"); 
-  void* batch_normalization_1_variance =  readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); 
-  std::string depthwise_conv2d_1_w_path =  dir_prefix + std::string("depthwise_conv2d_1_w.bin"); 
-  void* depthwise_conv2d_1_w =  readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); 
-  std::string batch_normalization_2_gamma_path =  dir_prefix + std::string("batch_normalization_2_gamma.bin"); 
-  void* batch_normalization_2_gamma =  readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_beta_path =  dir_prefix + std::string("batch_normalization_2_beta.bin"); 
-  void* batch_normalization_2_beta =  readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_mean_path =  dir_prefix + std::string("batch_normalization_2_mean.bin"); 
-  void* batch_normalization_2_mean =  readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); 
-  std::string batch_normalization_2_variance_path =  dir_prefix + std::string("batch_normalization_2_variance.bin"); 
-  void* batch_normalization_2_variance =  readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); 
-  std::string batch_normalization_3_gamma_path =  dir_prefix + std::string("batch_normalization_3_gamma.bin"); 
-  void* batch_normalization_3_gamma =  readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_beta_path =  dir_prefix + std::string("batch_normalization_3_beta.bin"); 
-  void* batch_normalization_3_beta =  readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_mean_path =  dir_prefix + std::string("batch_normalization_3_mean.bin"); 
-  void* batch_normalization_3_mean =  readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_3_variance_path =  dir_prefix + std::string("batch_normalization_3_variance.bin"); 
-  void* batch_normalization_3_variance =  readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); 
-  std::string depthwise_conv2d_2_w_path =  dir_prefix + std::string("depthwise_conv2d_2_w.bin"); 
-  void* depthwise_conv2d_2_w =  readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); 
-  std::string batch_normalization_4_gamma_path =  dir_prefix + std::string("batch_normalization_4_gamma.bin"); 
-  void* batch_normalization_4_gamma =  readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_beta_path =  dir_prefix + std::string("batch_normalization_4_beta.bin"); 
-  void* batch_normalization_4_beta =  readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_mean_path =  dir_prefix + std::string("batch_normalization_4_mean.bin"); 
-  void* batch_normalization_4_mean =  readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); 
-  std::string batch_normalization_4_variance_path =  dir_prefix + std::string("batch_normalization_4_variance.bin"); 
-  void* batch_normalization_4_variance =  readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); 
-  std::string batch_normalization_5_gamma_path =  dir_prefix + std::string("batch_normalization_5_gamma.bin"); 
-  void* batch_normalization_5_gamma =  readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_beta_path =  dir_prefix + std::string("batch_normalization_5_beta.bin"); 
-  void* batch_normalization_5_beta =  readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_mean_path =  dir_prefix + std::string("batch_normalization_5_mean.bin"); 
-  void* batch_normalization_5_mean =  readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_5_variance_path =  dir_prefix + std::string("batch_normalization_5_variance.bin"); 
-  void* batch_normalization_5_variance =  readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_3_w_path =  dir_prefix + std::string("depthwise_conv2d_3_w.bin"); 
-  void* depthwise_conv2d_3_w =  readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_6_gamma_path =  dir_prefix + std::string("batch_normalization_6_gamma.bin"); 
-  void* batch_normalization_6_gamma =  readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_beta_path =  dir_prefix + std::string("batch_normalization_6_beta.bin"); 
-  void* batch_normalization_6_beta =  readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_mean_path =  dir_prefix + std::string("batch_normalization_6_mean.bin"); 
-  void* batch_normalization_6_mean =  readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_6_variance_path =  dir_prefix + std::string("batch_normalization_6_variance.bin"); 
-  void* batch_normalization_6_variance =  readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); 
-  std::string batch_normalization_7_gamma_path =  dir_prefix + std::string("batch_normalization_7_gamma.bin"); 
-  void* batch_normalization_7_gamma =  readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_beta_path =  dir_prefix + std::string("batch_normalization_7_beta.bin"); 
-  void* batch_normalization_7_beta =  readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_mean_path =  dir_prefix + std::string("batch_normalization_7_mean.bin"); 
-  void* batch_normalization_7_mean =  readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_7_variance_path =  dir_prefix + std::string("batch_normalization_7_variance.bin"); 
-  void* batch_normalization_7_variance =  readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); 
-  std::string depthwise_conv2d_4_w_path =  dir_prefix + std::string("depthwise_conv2d_4_w.bin"); 
-  void* depthwise_conv2d_4_w =  readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); 
-  std::string batch_normalization_8_gamma_path =  dir_prefix + std::string("batch_normalization_8_gamma.bin"); 
-  void* batch_normalization_8_gamma =  readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_beta_path =  dir_prefix + std::string("batch_normalization_8_beta.bin"); 
-  void* batch_normalization_8_beta =  readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_mean_path =  dir_prefix + std::string("batch_normalization_8_mean.bin"); 
-  void* batch_normalization_8_mean =  readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); 
-  std::string batch_normalization_8_variance_path =  dir_prefix + std::string("batch_normalization_8_variance.bin"); 
-  void* batch_normalization_8_variance =  readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); 
-  std::string batch_normalization_9_gamma_path =  dir_prefix + std::string("batch_normalization_9_gamma.bin"); 
-  void* batch_normalization_9_gamma =  readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_beta_path =  dir_prefix + std::string("batch_normalization_9_beta.bin"); 
-  void* batch_normalization_9_beta =  readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_mean_path =  dir_prefix + std::string("batch_normalization_9_mean.bin"); 
-  void* batch_normalization_9_mean =  readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_9_variance_path =  dir_prefix + std::string("batch_normalization_9_variance.bin"); 
-  void* batch_normalization_9_variance =  readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_5_w_path =  dir_prefix + std::string("depthwise_conv2d_5_w.bin"); 
-  void* depthwise_conv2d_5_w =  readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_10_gamma_path =  dir_prefix + std::string("batch_normalization_10_gamma.bin"); 
-  void* batch_normalization_10_gamma =  readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_beta_path =  dir_prefix + std::string("batch_normalization_10_beta.bin"); 
-  void* batch_normalization_10_beta =  readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_mean_path =  dir_prefix + std::string("batch_normalization_10_mean.bin"); 
-  void* batch_normalization_10_mean =  readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_10_variance_path =  dir_prefix + std::string("batch_normalization_10_variance.bin"); 
-  void* batch_normalization_10_variance =  readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); 
-  std::string batch_normalization_11_gamma_path =  dir_prefix + std::string("batch_normalization_11_gamma.bin"); 
-  void* batch_normalization_11_gamma =  readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_beta_path =  dir_prefix + std::string("batch_normalization_11_beta.bin"); 
-  void* batch_normalization_11_beta =  readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_mean_path =  dir_prefix + std::string("batch_normalization_11_mean.bin"); 
-  void* batch_normalization_11_mean =  readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_11_variance_path =  dir_prefix + std::string("batch_normalization_11_variance.bin"); 
-  void* batch_normalization_11_variance =  readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); 
-  std::string depthwise_conv2d_6_w_path =  dir_prefix + std::string("depthwise_conv2d_6_w.bin"); 
-  void* depthwise_conv2d_6_w =  readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); 
-  std::string batch_normalization_12_gamma_path =  dir_prefix + std::string("batch_normalization_12_gamma.bin"); 
-  void* batch_normalization_12_gamma =  readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_beta_path =  dir_prefix + std::string("batch_normalization_12_beta.bin"); 
-  void* batch_normalization_12_beta =  readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_mean_path =  dir_prefix + std::string("batch_normalization_12_mean.bin"); 
-  void* batch_normalization_12_mean =  readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); 
-  std::string batch_normalization_12_variance_path =  dir_prefix + std::string("batch_normalization_12_variance.bin"); 
-  void* batch_normalization_12_variance =  readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); 
-  std::string batch_normalization_13_gamma_path =  dir_prefix + std::string("batch_normalization_13_gamma.bin"); 
-  void* batch_normalization_13_gamma =  readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_beta_path =  dir_prefix + std::string("batch_normalization_13_beta.bin"); 
-  void* batch_normalization_13_beta =  readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_mean_path =  dir_prefix + std::string("batch_normalization_13_mean.bin"); 
-  void* batch_normalization_13_mean =  readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); 
-  std::string batch_normalization_13_variance_path =  dir_prefix + std::string("batch_normalization_13_variance.bin"); 
-  void* batch_normalization_13_variance =  readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000; 
-  int batch_size = 2500; 
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size; 
-    int end = (i + 1) * batch_size; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-    void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); 
-    void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); 
-    void* var_2 = tensorRelu(var_1); 
-    void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); 
-    void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); 
-    void* var_6 = tensorRelu(var_5); 
-    void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); 
-    void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); 
-    void* var_9 = tensorRelu(var_8); 
-    void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); 
-    void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); 
-    void* var_13 = tensorRelu(var_12); 
-    void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); 
-    void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); 
-    void* var_16 = tensorRelu(var_15); 
-    void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); 
-    void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); 
-    void* var_20 = tensorRelu(var_19); 
-    void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); 
-    void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); 
-    void* var_23 = tensorRelu(var_22); 
-    void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); 
-    void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); 
-    void* var_28 = tensorRelu(var_27); 
-    void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); 
-    void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); 
-    void* var_31 = tensorRelu(var_30); 
-    void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); 
-    void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); 
-    void* var_35 = tensorRelu(var_34); 
-    void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); 
-    void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); 
-    void* var_38 = tensorRelu(var_37); 
-    void* var_41 = tensorConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); 
-    void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); 
-    void* var_43 = tensorRelu(var_42); 
-    void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); 
-    void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); 
-    void* var_46 = tensorRelu(var_45); 
-    void* var_47 = tensorPooling(var_46,1,2,2,0,0,2,2); 
-    void* var_49 = tensorGemmGPU(var_47, dense_1_w); 
-    void* var_50 = tensorAdd(var_49, dense_1_b); 
-    void* var_51 = tensorSoftmax(var_50); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, var_51); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_approx.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_approx.cc
deleted file mode 100644
index 11cc3a38dd5da9dfcee7dd1181ab7e9a099fef88..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_approx.cc
+++ /dev/null
@@ -1,221 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-  
-  std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  //uint8_t* labels = readLabels(labels_path.c_str(), batch_size); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_14_b_path =  dir_prefix + std::string("conv2d_14_b.bin"); 
-  void* conv2d_14_b =  readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_15_w_path =  dir_prefix + std::string("conv2d_15_w.bin"); 
-  void* conv2d_15_w =  readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); 
-  std::string conv2d_15_b_path =  dir_prefix + std::string("conv2d_15_b.bin"); 
-  void* conv2d_15_b =  readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_17_w_path =  dir_prefix + std::string("conv2d_17_w.bin"); 
-  void* conv2d_17_w =  readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); 
-  std::string conv2d_17_b_path =  dir_prefix + std::string("conv2d_17_b.bin"); 
-  void* conv2d_17_b =  readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_16_w_path =  dir_prefix + std::string("conv2d_16_w.bin"); 
-  void* conv2d_16_w =  readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_16_b_path =  dir_prefix + std::string("conv2d_16_b.bin"); 
-  void* conv2d_16_b =  readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_18_w_path =  dir_prefix + std::string("conv2d_18_w.bin"); 
-  void* conv2d_18_w =  readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_18_b_path =  dir_prefix + std::string("conv2d_18_b.bin"); 
-  void* conv2d_18_b =  readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_19_w_path =  dir_prefix + std::string("conv2d_19_w.bin"); 
-  void* conv2d_19_w =  readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_19_b_path =  dir_prefix + std::string("conv2d_19_b.bin"); 
-  void* conv2d_19_b =  readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_20_w_path =  dir_prefix + std::string("conv2d_20_w.bin"); 
-  void* conv2d_20_w =  readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_20_b_path =  dir_prefix + std::string("conv2d_20_b.bin"); 
-  void* conv2d_20_b =  readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_21_w_path =  dir_prefix + std::string("conv2d_21_w.bin"); 
-  void* conv2d_21_w =  readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_21_b_path =  dir_prefix + std::string("conv2d_21_b.bin"); 
-  void* conv2d_21_b =  readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 10000;
-  int batch_size = 2000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-  
-  for(int i = 0; i < batch_count; i++){
-
-    int start = i * batch_size;
-    int end = (i + 1) * batch_size;
-    
-    void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-    
-    void* var_2 = tensorConvPerf(input, conv2d_1_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_3 = tensorAdd(var_2, conv2d_1_b); 
-    void* var_4 = tensorRelu(var_3); 
-    void* var_6 = tensorConvPerf(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_7 = tensorAdd(var_6, conv2d_2_b); 
-    void* var_8 = tensorRelu(var_7); 
-    void* var_10 = tensorConvPerf(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_11 = tensorAdd(var_10, conv2d_3_b); 
-    void* var_12 = tensorAdd(var_4, var_11); 
-    void* var_13 = tensorRelu(var_12); 
-    void* var_15 = tensorConvPerf(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_16 = tensorAdd(var_15, conv2d_4_b); 
-    void* var_17 = tensorRelu(var_16); 
-    void* var_19 = tensorConvPerf(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_20 = tensorAdd(var_19, conv2d_5_b); 
-    void* var_21 = tensorAdd(var_13, var_20); 
-    void* var_22 = tensorRelu(var_21); 
-    void* var_24 = tensorConvPerf(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_25 = tensorAdd(var_24, conv2d_6_b); 
-    void* var_26 = tensorRelu(var_25); 
-    void* var_28 = tensorConvPerf(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_29 = tensorAdd(var_28, conv2d_7_b); 
-    void* var_30 = tensorAdd(var_22, var_29); 
-    void* var_31 = tensorRelu(var_30); 
-    void* var_33 = tensorConvPerf(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0, 0, 0); 
-    void* var_34 = tensorAdd(var_33, conv2d_8_b); 
-    void* var_35 = tensorRelu(var_34); 
-    void* var_37 = tensorConvPerf(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_38 = tensorAdd(var_37, conv2d_9_b); 
-    void* var_40 = tensorConvPerf(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0, 0, 0); 
-    void* var_41 = tensorAdd(var_40, conv2d_10_b); 
-    void* var_42 = tensorAdd(var_41, var_38); 
-    void* var_43 = tensorRelu(var_42); 
-    void* var_45 = tensorConvPerf(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_46 = tensorAdd(var_45, conv2d_11_b); 
-    void* var_47 = tensorRelu(var_46); 
-    void* var_49 = tensorConvPerf(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_50 = tensorAdd(var_49, conv2d_12_b); 
-    void* var_51 = tensorAdd(var_43, var_50); 
-    void* var_52 = tensorRelu(var_51); 
-    void* var_54 = tensorConvPerf(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0, 1, 0); 
-    void* var_55 = tensorAdd(var_54, conv2d_13_b); 
-    void* var_56 = tensorRelu(var_55); 
-    void* var_58 = tensorConvPerf(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0, 0, 1); 
-    void* var_59 = tensorAdd(var_58, conv2d_14_b); 
-    void* var_60 = tensorAdd(var_52, var_59); 
-    void* var_61 = tensorRelu(var_60); 
-    void* var_63 = tensorConvPerf(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0, 0, 0); 
-    void* var_64 = tensorAdd(var_63, conv2d_15_b); 
-    void* var_65 = tensorRelu(var_64); 
-    void* var_67 = tensorConvPerf(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_68 = tensorAdd(var_67, conv2d_16_b); 
-    void* var_70 = tensorConvPerf(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0, 0, 0); 
-    void* var_71 = tensorAdd(var_70, conv2d_17_b); 
-    void* var_72 = tensorAdd(var_71, var_68); 
-    void* var_73 = tensorRelu(var_72); 
-    void* var_75 = tensorConvPerf(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_76 = tensorAdd(var_75, conv2d_18_b); 
-    void* var_77 = tensorRelu(var_76); 
-    void* var_79 = tensorConvPerf(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_80 = tensorAdd(var_79, conv2d_19_b); 
-    void* var_81 = tensorAdd(var_73, var_80); 
-    void* var_82 = tensorRelu(var_81); 
-    void* var_84 = tensorConvPerf(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_85 = tensorAdd(var_84, conv2d_20_b); 
-    void* var_86 = tensorRelu(var_85); 
-    void* var_88 = tensorConvPerf(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0, 0, 0); 
-    void* var_89 = tensorAdd(var_88, conv2d_21_b); 
-    void* var_90 = tensorAdd(var_82, var_89); 
-    void* var_91 = tensorRelu(var_90); 
-    void* var_92 = tensorPooling(var_91,1,8,8,0,0,8,8); 
-    void* var_94 = tensorGemmGPU(var_92, dense_1_w); 
-    void* var_95 = tensorAdd(var_94, dense_1_b); 
-    void* var_96 = tensorSoftmax(var_95); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-    float accuracy = computeAccuracy2(labels,batch_size,var_96); 
-    final_accuracy += accuracy;
-    
-    freeBatchMemory();
-  }
-
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count;
-  dumpFinalAccuracy(final_accuracy);
-
-  
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_cudaperf.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_cudaperf.cc
deleted file mode 100644
index 2e33715e8c6972966e7359a1e7b8fc5069e1f16f..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_cudaperf.cc
+++ /dev/null
@@ -1,221 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-  
-  std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  //uint8_t* labels = readLabels(labels_path.c_str(), batch_size); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_14_b_path =  dir_prefix + std::string("conv2d_14_b.bin"); 
-  void* conv2d_14_b =  readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_15_w_path =  dir_prefix + std::string("conv2d_15_w.bin"); 
-  void* conv2d_15_w =  readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); 
-  std::string conv2d_15_b_path =  dir_prefix + std::string("conv2d_15_b.bin"); 
-  void* conv2d_15_b =  readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_17_w_path =  dir_prefix + std::string("conv2d_17_w.bin"); 
-  void* conv2d_17_w =  readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); 
-  std::string conv2d_17_b_path =  dir_prefix + std::string("conv2d_17_b.bin"); 
-  void* conv2d_17_b =  readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_16_w_path =  dir_prefix + std::string("conv2d_16_w.bin"); 
-  void* conv2d_16_w =  readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_16_b_path =  dir_prefix + std::string("conv2d_16_b.bin"); 
-  void* conv2d_16_b =  readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_18_w_path =  dir_prefix + std::string("conv2d_18_w.bin"); 
-  void* conv2d_18_w =  readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_18_b_path =  dir_prefix + std::string("conv2d_18_b.bin"); 
-  void* conv2d_18_b =  readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_19_w_path =  dir_prefix + std::string("conv2d_19_w.bin"); 
-  void* conv2d_19_w =  readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_19_b_path =  dir_prefix + std::string("conv2d_19_b.bin"); 
-  void* conv2d_19_b =  readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_20_w_path =  dir_prefix + std::string("conv2d_20_w.bin"); 
-  void* conv2d_20_w =  readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_20_b_path =  dir_prefix + std::string("conv2d_20_b.bin"); 
-  void* conv2d_20_b =  readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_21_w_path =  dir_prefix + std::string("conv2d_21_w.bin"); 
-  void* conv2d_21_w =  readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_21_b_path =  dir_prefix + std::string("conv2d_21_b.bin"); 
-  void* conv2d_21_b =  readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 10000;
-  int batch_size = 2000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-  
-  for(int i = 0; i < batch_count; i++){
-
-    int start = i * batch_size;
-    int end = (i + 1) * batch_size;
-    
-    void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-    
-    void* var_2 = tensorConvPerfCuda(input, conv2d_1_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_3 = tensorAdd(var_2, conv2d_1_b); 
-    void* var_4 = tensorRelu(var_3); 
-    void* var_6 = tensorConvPerfCuda(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_7 = tensorAdd(var_6, conv2d_2_b); 
-    void* var_8 = tensorRelu(var_7); 
-    void* var_10 = tensorConvPerfCuda(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_11 = tensorAdd(var_10, conv2d_3_b); 
-    void* var_12 = tensorAdd(var_4, var_11); 
-    void* var_13 = tensorRelu(var_12); 
-    void* var_15 = tensorConvPerfCuda(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_16 = tensorAdd(var_15, conv2d_4_b); 
-    void* var_17 = tensorRelu(var_16); 
-    void* var_19 = tensorConvPerfCuda(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_20 = tensorAdd(var_19, conv2d_5_b); 
-    void* var_21 = tensorAdd(var_13, var_20); 
-    void* var_22 = tensorRelu(var_21); 
-    void* var_24 = tensorConvPerfCuda(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0, 3, 1, 2); 
-    void* var_25 = tensorAdd(var_24, conv2d_6_b); 
-    void* var_26 = tensorRelu(var_25); 
-    void* var_28 = tensorConvPerfCuda(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_29 = tensorAdd(var_28, conv2d_7_b); 
-    void* var_30 = tensorAdd(var_22, var_29); 
-    void* var_31 = tensorRelu(var_30); 
-    void* var_33 = tensorConvPerfCuda(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0, 1, 1, 0); 
-    void* var_34 = tensorAdd(var_33, conv2d_8_b); 
-    void* var_35 = tensorRelu(var_34); 
-    void* var_37 = tensorConvPerfCuda(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_38 = tensorAdd(var_37, conv2d_9_b); 
-    void* var_40 = tensorConvPerfCuda(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0, 1, 1, 0); 
-    void* var_41 = tensorAdd(var_40, conv2d_10_b); 
-    void* var_42 = tensorAdd(var_41, var_38); 
-    void* var_43 = tensorRelu(var_42); 
-    void* var_45 = tensorConvPerfCuda(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0, 3, 1, 0); 
-    void* var_46 = tensorAdd(var_45, conv2d_11_b); 
-    void* var_47 = tensorRelu(var_46); 
-    void* var_49 = tensorConvPerfCuda(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_50 = tensorAdd(var_49, conv2d_12_b); 
-    void* var_51 = tensorAdd(var_43, var_50); 
-    void* var_52 = tensorRelu(var_51); 
-    void* var_54 = tensorConvPerfCuda(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_55 = tensorAdd(var_54, conv2d_13_b); 
-    void* var_56 = tensorRelu(var_55); 
-    void* var_58 = tensorConvPerfCuda(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0, 1, 3, 1); 
-    void* var_59 = tensorAdd(var_58, conv2d_14_b); 
-    void* var_60 = tensorAdd(var_52, var_59); 
-    void* var_61 = tensorRelu(var_60); 
-    void* var_63 = tensorConvPerfCuda(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0, 1, 1, 0); 
-    void* var_64 = tensorAdd(var_63, conv2d_15_b); 
-    void* var_65 = tensorRelu(var_64); 
-    void* var_67 = tensorConvPerfCuda(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_68 = tensorAdd(var_67, conv2d_16_b); 
-    void* var_70 = tensorConvPerfCuda(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0, 3, 1, 2); 
-    void* var_71 = tensorAdd(var_70, conv2d_17_b); 
-    void* var_72 = tensorAdd(var_71, var_68); 
-    void* var_73 = tensorRelu(var_72); 
-    void* var_75 = tensorConvPerfCuda(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_76 = tensorAdd(var_75, conv2d_18_b); 
-    void* var_77 = tensorRelu(var_76); 
-    void* var_79 = tensorConvPerfCuda(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0, 1, 3, 0); 
-    void* var_80 = tensorAdd(var_79, conv2d_19_b); 
-    void* var_81 = tensorAdd(var_73, var_80); 
-    void* var_82 = tensorRelu(var_81); 
-    void* var_84 = tensorConvPerfCuda(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_85 = tensorAdd(var_84, conv2d_20_b); 
-    void* var_86 = tensorRelu(var_85); 
-    void* var_88 = tensorConvPerfCuda(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); 
-    void* var_89 = tensorAdd(var_88, conv2d_21_b); 
-    void* var_90 = tensorAdd(var_82, var_89); 
-    void* var_91 = tensorRelu(var_90); 
-    void* var_92 = tensorPooling(var_91,1,8,8,0,0,8,8); 
-    void* var_94 = tensorGemmGPU(var_92, dense_1_w); 
-    void* var_95 = tensorAdd(var_94, dense_1_b); 
-    void* var_96 = tensorSoftmax(var_95); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-    float accuracy = computeAccuracy2(labels,batch_size,var_96); 
-    final_accuracy += accuracy;
-    
-    freeBatchMemory();
-  }
-
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count;
-  dumpFinalAccuracy(final_accuracy);
-
-  
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_half.cc
deleted file mode 100644
index f7c6593d525351085ee99606bc90fc1419980d8e..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_half.cc
+++ /dev/null
@@ -1,194 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(1); 
-
-  int batch_size = 4000;
- 
-  std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  uint8_t* labels = readLabels(labels_path.c_str(), batch_size); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_14_b_path =  dir_prefix + std::string("conv2d_14_b.bin"); 
-  void* conv2d_14_b =  readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_15_w_path =  dir_prefix + std::string("conv2d_15_w.bin"); 
-  void* conv2d_15_w =  readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); 
-  std::string conv2d_15_b_path =  dir_prefix + std::string("conv2d_15_b.bin"); 
-  void* conv2d_15_b =  readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_17_w_path =  dir_prefix + std::string("conv2d_17_w.bin"); 
-  void* conv2d_17_w =  readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); 
-  std::string conv2d_17_b_path =  dir_prefix + std::string("conv2d_17_b.bin"); 
-  void* conv2d_17_b =  readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_16_w_path =  dir_prefix + std::string("conv2d_16_w.bin"); 
-  void* conv2d_16_w =  readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_16_b_path =  dir_prefix + std::string("conv2d_16_b.bin"); 
-  void* conv2d_16_b =  readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_18_w_path =  dir_prefix + std::string("conv2d_18_w.bin"); 
-  void* conv2d_18_w =  readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_18_b_path =  dir_prefix + std::string("conv2d_18_b.bin"); 
-  void* conv2d_18_b =  readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_19_w_path =  dir_prefix + std::string("conv2d_19_w.bin"); 
-  void* conv2d_19_w =  readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_19_b_path =  dir_prefix + std::string("conv2d_19_b.bin"); 
-  void* conv2d_19_b =  readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_20_w_path =  dir_prefix + std::string("conv2d_20_w.bin"); 
-  void* conv2d_20_w =  readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_20_b_path =  dir_prefix + std::string("conv2d_20_b.bin"); 
-  void* conv2d_20_b =  readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_21_w_path =  dir_prefix + std::string("conv2d_21_w.bin"); 
-  void* conv2d_21_w =  readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_21_b_path =  dir_prefix + std::string("conv2d_21_b.bin"); 
-  void* conv2d_21_b =  readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  void* var_2 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-  void* var_3 = tensorHalfAdd(var_2, conv2d_1_b); 
-  void* var_4 = tensorHalfRelu(var_3); 
-  void* var_6 = tensorHalfConvolution(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-  void* var_7 = tensorHalfAdd(var_6, conv2d_2_b); 
-  void* var_8 = tensorHalfRelu(var_7); 
-  void* var_10 = tensorHalfConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-  void* var_11 = tensorHalfAdd(var_10, conv2d_3_b); 
-  void* var_12 = tensorHalfAdd(var_4, var_11); 
-  void* var_13 = tensorHalfRelu(var_12); 
-  void* var_15 = tensorHalfConvolution(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-  void* var_16 = tensorHalfAdd(var_15, conv2d_4_b); 
-  void* var_17 = tensorHalfRelu(var_16); 
-  void* var_19 = tensorHalfConvolution(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-  void* var_20 = tensorHalfAdd(var_19, conv2d_5_b); 
-  void* var_21 = tensorHalfAdd(var_13, var_20); 
-  void* var_22 = tensorHalfRelu(var_21); 
-  void* var_24 = tensorHalfConvolution(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-  void* var_25 = tensorHalfAdd(var_24, conv2d_6_b); 
-  void* var_26 = tensorHalfRelu(var_25); 
-  void* var_28 = tensorHalfConvolution(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-  void* var_29 = tensorHalfAdd(var_28, conv2d_7_b); 
-  void* var_30 = tensorHalfAdd(var_22, var_29); 
-  void* var_31 = tensorHalfRelu(var_30); 
-  void* var_33 = tensorHalfConvolution(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0); 
-  void* var_34 = tensorHalfAdd(var_33, conv2d_8_b); 
-  void* var_35 = tensorHalfRelu(var_34); 
-  void* var_37 = tensorHalfConvolution(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-  void* var_38 = tensorHalfAdd(var_37, conv2d_9_b); 
-  void* var_40 = tensorHalfConvolution(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0); 
-  void* var_41 = tensorHalfAdd(var_40, conv2d_10_b); 
-  void* var_42 = tensorHalfAdd(var_41, var_38); 
-  void* var_43 = tensorHalfRelu(var_42); 
-  void* var_45 = tensorHalfConvolution(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-  void* var_46 = tensorHalfAdd(var_45, conv2d_11_b); 
-  void* var_47 = tensorHalfRelu(var_46); 
-  void* var_49 = tensorHalfConvolution(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-  void* var_50 = tensorHalfAdd(var_49, conv2d_12_b); 
-  void* var_51 = tensorHalfAdd(var_43, var_50); 
-  void* var_52 = tensorHalfRelu(var_51); 
-  void* var_54 = tensorHalfConvolution(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-  void* var_55 = tensorHalfAdd(var_54, conv2d_13_b); 
-  void* var_56 = tensorHalfRelu(var_55); 
-  void* var_58 = tensorHalfConvolution(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0); 
-  void* var_59 = tensorHalfAdd(var_58, conv2d_14_b); 
-  void* var_60 = tensorHalfAdd(var_52, var_59); 
-  void* var_61 = tensorHalfRelu(var_60); 
-  void* var_63 = tensorHalfConvolution(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0); 
-  void* var_64 = tensorHalfAdd(var_63, conv2d_15_b); 
-  void* var_65 = tensorHalfRelu(var_64); 
-  void* var_67 = tensorHalfConvolution(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0); 
-  void* var_68 = tensorHalfAdd(var_67, conv2d_16_b); 
-  void* var_70 = tensorHalfConvolution(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0); 
-  void* var_71 = tensorHalfAdd(var_70, conv2d_17_b); 
-  void* var_72 = tensorHalfAdd(var_71, var_68); 
-  void* var_73 = tensorHalfRelu(var_72); 
-  void* var_75 = tensorHalfConvolution(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0); 
-  void* var_76 = tensorHalfAdd(var_75, conv2d_18_b); 
-  void* var_77 = tensorHalfRelu(var_76); 
-  void* var_79 = tensorHalfConvolution(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0); 
-  void* var_80 = tensorHalfAdd(var_79, conv2d_19_b); 
-  void* var_81 = tensorHalfAdd(var_73, var_80); 
-  void* var_82 = tensorHalfRelu(var_81); 
-  void* var_84 = tensorHalfConvolution(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0); 
-  void* var_85 = tensorHalfAdd(var_84, conv2d_20_b); 
-  void* var_86 = tensorHalfRelu(var_85); 
-  void* var_88 = tensorHalfConvolution(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0); 
-  void* var_89 = tensorHalfAdd(var_88, conv2d_21_b); 
-  void* var_90 = tensorHalfAdd(var_82, var_89); 
-  void* var_91 = tensorHalfRelu(var_90); 
-  void* var_92 = tensorHalfPooling(var_91,1,8,8,0,0,8,8); 
-  void* var_94 = tensorHalfGemmGPU(var_92, dense_1_w); 
-  void* var_95 = tensorHalfAdd(var_94, dense_1_b); 
-  void* var_96 = tensorSoftmax(var_95); 
-
-  computeAccuracy2(labels, batch_size,var_96); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_inputapprox.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_inputapprox.cc
deleted file mode 100644
index 6634ce92c9aed0fbcc32e68580fb3171145ee297..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_inputapprox.cc
+++ /dev/null
@@ -1,221 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-  
-  std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  //uint8_t* labels = readLabels(labels_path.c_str(), batch_size); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_14_b_path =  dir_prefix + std::string("conv2d_14_b.bin"); 
-  void* conv2d_14_b =  readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_15_w_path =  dir_prefix + std::string("conv2d_15_w.bin"); 
-  void* conv2d_15_w =  readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); 
-  std::string conv2d_15_b_path =  dir_prefix + std::string("conv2d_15_b.bin"); 
-  void* conv2d_15_b =  readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_17_w_path =  dir_prefix + std::string("conv2d_17_w.bin"); 
-  void* conv2d_17_w =  readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); 
-  std::string conv2d_17_b_path =  dir_prefix + std::string("conv2d_17_b.bin"); 
-  void* conv2d_17_b =  readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_16_w_path =  dir_prefix + std::string("conv2d_16_w.bin"); 
-  void* conv2d_16_w =  readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_16_b_path =  dir_prefix + std::string("conv2d_16_b.bin"); 
-  void* conv2d_16_b =  readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_18_w_path =  dir_prefix + std::string("conv2d_18_w.bin"); 
-  void* conv2d_18_w =  readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_18_b_path =  dir_prefix + std::string("conv2d_18_b.bin"); 
-  void* conv2d_18_b =  readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_19_w_path =  dir_prefix + std::string("conv2d_19_w.bin"); 
-  void* conv2d_19_w =  readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_19_b_path =  dir_prefix + std::string("conv2d_19_b.bin"); 
-  void* conv2d_19_b =  readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_20_w_path =  dir_prefix + std::string("conv2d_20_w.bin"); 
-  void* conv2d_20_w =  readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_20_b_path =  dir_prefix + std::string("conv2d_20_b.bin"); 
-  void* conv2d_20_b =  readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_21_w_path =  dir_prefix + std::string("conv2d_21_w.bin"); 
-  void* conv2d_21_w =  readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_21_b_path =  dir_prefix + std::string("conv2d_21_b.bin"); 
-  void* conv2d_21_b =  readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 10000;
-  int batch_size = 2000;
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-  
-  for(int i = 0; i < batch_count; i++){
-
-    int start = i * batch_size;
-    int end = (i + 1) * batch_size;
-    
-    void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-    
-    void* var_2 = tensorConvolutionKernelSamp(input, conv2d_1_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_3 = tensorAdd(var_2, conv2d_1_b); 
-    void* var_4 = tensorRelu(var_3); 
-    void* var_6 = tensorConvolutionKernelSamp(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_7 = tensorAdd(var_6, conv2d_2_b); 
-    void* var_8 = tensorRelu(var_7); 
-    void* var_10 = tensorConvolutionKernelSamp(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_11 = tensorAdd(var_10, conv2d_3_b); 
-    void* var_12 = tensorAdd(var_4, var_11); 
-    void* var_13 = tensorRelu(var_12); 
-    void* var_15 = tensorConvolutionKernelSamp(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_16 = tensorAdd(var_15, conv2d_4_b); 
-    void* var_17 = tensorRelu(var_16); 
-    void* var_19 = tensorConvolutionKernelSamp(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_20 = tensorAdd(var_19, conv2d_5_b); 
-    void* var_21 = tensorAdd(var_13, var_20); 
-    void* var_22 = tensorRelu(var_21); 
-    void* var_24 = tensorConvolutionKernelSamp(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0, 4); 
-    void* var_25 = tensorAdd(var_24, conv2d_6_b); 
-    void* var_26 = tensorRelu(var_25); 
-    void* var_28 = tensorConvolutionKernelSamp(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0, 4); 
-    void* var_29 = tensorAdd(var_28, conv2d_7_b); 
-    void* var_30 = tensorAdd(var_22, var_29); 
-    void* var_31 = tensorRelu(var_30); 
-    void* var_33 = tensorConvolutionKernelSamp(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0, 30); 
-    void* var_34 = tensorAdd(var_33, conv2d_8_b); 
-    void* var_35 = tensorRelu(var_34); 
-    void* var_37 = tensorConvolutionKernelSamp(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_38 = tensorAdd(var_37, conv2d_9_b); 
-    void* var_40 = tensorConvolutionKernelSamp(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0, 30); 
-    void* var_41 = tensorAdd(var_40, conv2d_10_b); 
-    void* var_42 = tensorAdd(var_41, var_38); 
-    void* var_43 = tensorRelu(var_42); 
-    void* var_45 = tensorConvolutionKernelSamp(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0, 4); 
-    void* var_46 = tensorAdd(var_45, conv2d_11_b); 
-    void* var_47 = tensorRelu(var_46); 
-    void* var_49 = tensorConvolutionKernelSamp(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_50 = tensorAdd(var_49, conv2d_12_b); 
-    void* var_51 = tensorAdd(var_43, var_50); 
-    void* var_52 = tensorRelu(var_51); 
-    void* var_54 = tensorConvolutionKernelSamp(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_55 = tensorAdd(var_54, conv2d_13_b); 
-    void* var_56 = tensorRelu(var_55); 
-    void* var_58 = tensorConvolutionKernelSamp(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_59 = tensorAdd(var_58, conv2d_14_b); 
-    void* var_60 = tensorAdd(var_52, var_59); 
-    void* var_61 = tensorRelu(var_60); 
-    void* var_63 = tensorConvolutionKernelSamp(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0, 30); 
-    void* var_64 = tensorAdd(var_63, conv2d_15_b); 
-    void* var_65 = tensorRelu(var_64); 
-    void* var_67 = tensorConvolutionKernelSamp(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_68 = tensorAdd(var_67, conv2d_16_b); 
-    void* var_70 = tensorConvolutionKernelSamp(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0, 30); 
-    void* var_71 = tensorAdd(var_70, conv2d_17_b); 
-    void* var_72 = tensorAdd(var_71, var_68); 
-    void* var_73 = tensorRelu(var_72); 
-    void* var_75 = tensorConvolutionKernelSamp(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_76 = tensorAdd(var_75, conv2d_18_b); 
-    void* var_77 = tensorRelu(var_76); 
-    void* var_79 = tensorConvolutionKernelSamp(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_80 = tensorAdd(var_79, conv2d_19_b); 
-    void* var_81 = tensorAdd(var_73, var_80); 
-    void* var_82 = tensorRelu(var_81); 
-    void* var_84 = tensorConvolutionKernelSamp(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_85 = tensorAdd(var_84, conv2d_20_b); 
-    void* var_86 = tensorRelu(var_85); 
-    void* var_88 = tensorConvolutionKernelSamp(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0, 30); 
-    void* var_89 = tensorAdd(var_88, conv2d_21_b); 
-    void* var_90 = tensorAdd(var_82, var_89); 
-    void* var_91 = tensorRelu(var_90); 
-    void* var_92 = tensorPooling(var_91,1,8,8,0,0,8,8); 
-    void* var_94 = tensorGemmGPU(var_92, dense_1_w); 
-    void* var_95 = tensorAdd(var_94, dense_1_b); 
-    void* var_96 = tensorSoftmax(var_95); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-    float accuracy = computeAccuracy2(labels,batch_size,var_96); 
-    final_accuracy += accuracy;
-    
-    freeBatchMemory();
-  }
-
-  stopProfiling();
-
-  final_accuracy = final_accuracy / batch_count;
-  dumpFinalAccuracy(final_accuracy);
-
-  
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_alexnet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_alexnet.cc
deleted file mode 100644
index a8129a1e459a15e26f595972724451e01d81b0a1..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_alexnet.cc
+++ /dev/null
@@ -1,480 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-// FIXIT: Data allocations may need to organized - Alexnet may consume more than available mem
-
-/*void testAlexnet2(){
-
-  struct Tensor* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 100, 3, 224, 224);
-  initTensorValues(input);
-
-  struct Tensor* conv1filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 96, 3, 11, 11);
-  printTensorDims(conv1filter);
-
-  /****** Start of Layer 1 ***************
-  
-  // NOTE: Padding for same conv is computed as P = (F - 1 /2)
-  struct Tensor* conv1out = tensorConvolution(input, conv1filter, 5, 5, 4, 4);
-  printTensorDims(conv1out);
-
-  struct Tensor* conv1bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 96, 1, 1);
-  struct Tensor* conv1bias_out = tensorAdd(conv1out, conv1bias); 
-  
-  struct Tensor* relu1out = tensorRelu(conv1bias_out);
-  
-  // NOTE: These parameters are a deviation from the original paper
-  // The parameters match the alexnet TF model
-  // TODO: Try removing LRN and measure - seems like impact of LRN may be minimal
-  unsigned int LRN_window = 5;
-  double LRN_alpha = 2e-05;
-  double LRN_beta = 0.75;
-  double LRN_k = 1.0; 
-  struct Tensor* lrn1out = tensorLRN(relu1out, LRN_window, LRN_alpha, LRN_beta, LRN_k);
-  printTensorDims(lrn1out);
-
-  struct Tensor* maxpool1out = tensorPooling(lrn1out, 0, 3, 3, 0, 0, 2, 2);
-  
-  /****** End of Conv Layer 1 **********
-
-  struct Tensor** splits = tensorSplit(maxpool1out, 2, 1);
- 
-  struct Tensor* conv2W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 256, 48, 5, 5);
-  struct Tensor** conv2fils = tensorSplit(conv2W, 2, 0);
-
-  struct Tensor* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 2, 2, 1, 1);
-  printTensorDims(conv2a_out);
-
-  struct Tensor* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 2, 2, 1, 1);
-  printTensorDims(conv2b_out);
- 
-  struct Tensor* conv2_outs[2];
-  conv2_outs[0] = conv2a_out;
-  conv2_outs[1] = conv2b_out;
-
-  struct Tensor* conv2_concat_out = tensorConcat(conv2_outs, 2, 1);
-  printTensorDims(conv2_concat_out);
-
-  struct Tensor* conv2bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 256, 1, 1);
-  struct Tensor* conv2bias_out = tensorAdd(conv2_concat_out, conv2bias); 
-  struct Tensor* relu2out = tensorRelu(conv2bias_out);  
-  struct Tensor* lrn2out = tensorLRN(relu2out, LRN_window, LRN_alpha, LRN_beta, LRN_k);
-  printTensorDims(lrn2out);
-    
-  struct Tensor* maxpool2out = tensorPooling(lrn2out, 0, 3, 3, 0, 0, 2, 2);
-  printTensorDims(maxpool2out);
-
-  /******** End of Conv Layer 2 ************
-
-  struct Tensor* conv3filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 384, 256, 3, 3);
-  struct Tensor* conv3_out = tensorConvolution(maxpool2out, conv3filter, 1, 1, 1, 1);
-  
-  struct Tensor* conv3bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 384, 1, 1);
-  struct Tensor* conv3bias_out = tensorAdd(conv3_out, conv3bias); 
-  struct Tensor* relu3out = tensorRelu(conv3bias_out);  
-  printTensorDims(relu3out);
-
-  /********* End of Conv layer 3 ******
-
-  struct Tensor** splits2 = tensorSplit(relu3out, 2, 1);
-
-  struct Tensor* conv4W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 384, 192, 3, 3);
-  struct Tensor** conv4fils = tensorSplit(conv4W, 2, 0);
-
-  printTensorDims(splits2[0]);
-  printTensorDims(conv4fils[0]);
-  
-  struct Tensor* conv4a_out = tensorConvolution(splits2[0], conv4fils[0], 1, 1, 1, 1);
-  printTensorDims(conv4a_out);
-
-  struct Tensor* conv4b_out = tensorConvolution(splits2[1], conv4fils[1], 1, 1, 1, 1);
-  printTensorDims(conv4b_out);
- 
-  struct Tensor* conv4_outs[2];
-  conv4_outs[0] = conv4a_out;
-  conv4_outs[1] = conv4b_out;
-
-  struct Tensor* conv4_concat_out = tensorConcat(conv4_outs, 2, 1);
-  printTensorDims(conv4_concat_out);
-
-  struct Tensor* conv4bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 384, 1, 1);
-  struct Tensor* conv4bias_out = tensorAdd(conv4_concat_out, conv4bias); 
-  struct Tensor* relu4out = tensorRelu(conv4bias_out);  
-  printTensorDims(relu4out);
-  
-  /********* End of Conv layer 4 ******
-
-  struct Tensor** splits3 = tensorSplit(relu4out, 2, 1);
-
-  struct Tensor* conv5W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 256, 192, 3, 3);
-  struct Tensor** conv5fils = tensorSplit(conv5W, 2, 0);
-
-  printTensorDims(splits3[0]);
-  printTensorDims(conv5fils[0]);
-  
-  struct Tensor* conv5a_out = tensorConvolution(splits3[0], conv5fils[0], 1, 1, 1, 1);
-  printTensorDims(conv5a_out);
-
-  struct Tensor* conv5b_out = tensorConvolution(splits3[1], conv5fils[1], 1, 1, 1, 1);
-  printTensorDims(conv5b_out);
- 
-  struct Tensor* conv5_outs[2];
-  conv5_outs[0] = conv5a_out;
-  conv5_outs[1] = conv5b_out;
-
-  struct Tensor* conv5_concat_out = tensorConcat(conv5_outs, 2, 1);
-  printTensorDims(conv5_concat_out);
-
-  struct Tensor* conv5bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 256, 1, 1);
-  struct Tensor* conv5bias_out = tensorAdd(conv5_concat_out, conv5bias); 
-  struct Tensor* relu5out = tensorRelu(conv5bias_out);  
-  printTensorDims(relu5out);
-
-  struct Tensor* maxpool5out = tensorPooling(relu5out, 0, 3, 3, 0, 0, 2, 2);
-  printTensorDims(maxpool5out);
-
-  /********* End of Conv layer 5 ******
-
-  struct Tensor* fc1_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
-					      1, 1, 256*6*6, 4096);
-  struct Tensor* gemm1out = tensorGemm(maxpool5out, fc1_weights);  
-  printTensorDims(gemm1out);
-
-  struct Tensor* bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
-				       1, 1, 1, 4096);
-  struct Tensor* gemm1biasout = tensorGemmBias(gemm1out, bias);
-  printTensorDims(gemm1biasout);
-
-  struct Tensor* relu6out = tensorRelu(gemm1biasout);  
-  printTensorDims(relu6out);
-
-  /***** End of FC1 layer ********
-
-  struct Tensor* fc2_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
-					      1, 1, 4096, 4096);
-  struct Tensor* gemm2out = tensorGemm(relu6out, fc2_weights);  
-  printTensorDims(gemm2out);
-
-  struct Tensor* bias2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
-				       1, 1, 1, 4096);
-  struct Tensor* gemm2biasout = tensorGemmBias(gemm2out, bias2);
-  printTensorDims(gemm2biasout);
-
-  struct Tensor* relu7out = tensorRelu(gemm2biasout);  
-  printTensorDims(relu7out);
-
-  /***** End of FC2 layer ********
-
-  struct Tensor* fc3_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
-					      1, 1, 4096, 1000);
-  struct Tensor* gemm3out = tensorGemm(relu7out, fc3_weights);  
-  printTensorDims(gemm3out);
-
-  struct Tensor* bias3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
-				       1, 1, 1, 1000);
-  struct Tensor* gemm3biasout = tensorGemmBias(gemm3out, bias3);
-  printTensorDims(gemm3biasout);
-
-  /******** End of FC3 Layer **********
-  struct Tensor* result = tensorSoftmax(gemm3biasout);
-  printTensorDims(result);
-  
-} */
-
-
-
-void printLikelihood(char* labels_file, int num_labels, void* result_ptr){
-
-  struct Tensor* result = (struct Tensor*) result_ptr;
-  
-  size_t batch_dim = result->dims.dim_sizes[0];
-  size_t channels = result->dims.dim_sizes[1];
-  float* data = (float*) result->host_data;
-  
-  for(int i = 0; i < batch_dim; i++){
-    int chosen = 0;
-    for (int id = 1; id < channels; ++id){
-      if (data[i * channels + chosen] < data[i * channels + id]) chosen = id;
-    }
-
-    printf("** chosen = %d, label = %f, label+3 = %f \n",
-	   chosen, data[chosen], data[chosen+3]);   
-  }
-
-  //float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0;
-  //printf("****** Accuracy = %f \n\n", accuracy);  
-}
-
-
-//--- Results not matching
-// *** CHECK:
-// 1) cudnnCrossCorrelation vs cudnnConvolution
-// 2) Weights
-// 3) Tensor outputs
-// 4) Data layouts
-
-
-
-/*** NOTE: REFERECNCE ARCHITECTURE **/
-// FIXIT: Data allocations may need to organized - Alexnet may consume more than available mem
-void testAlexnet3(){
-
-  int test_batch_size = 2;
-  int conv_mode = 1; // CROSS_CORRELATION matches the TF conv2d implementation
-  int conv_precision = 0; // floating point precision for convolution
- 
-  printf("****** AlexNet Architecture 3 ******** \n\n");
-  void* input = readTrainedWeights("../alexnet/params/combined_imgs.bin",
-				   CUDNN_DATA_FLOAT, test_batch_size, 3, 227, 227);
-  dumpWeightsToFile("tensors_out/input.out", input); 
-    
-  /****** Start of Layer 1 ****************/  
-  void* conv1filter = readTrainedWeights("../alexnet/params/conv1.bin",
-					 CUDNN_DATA_FLOAT, 96, 3, 11, 11);
-
-  printTensorDims(conv1filter);
-  dumpWeightsToFile("tensors_out/conv1filter.out", conv1filter); 
-  
-  // NOTE: the trained model does NOT have any padding in this conv
-  void* conv1out = tensorConvolution(input, conv1filter, 4, 4, 4, 4,
-				     conv_mode, conv_precision);
-  printTensorDims(conv1out);
-  
-  void* conv1bias = readTrainedWeights("../alexnet/params/conv1.bias.bin",
-				       CUDNN_DATA_FLOAT, 1, 96, 1, 1);
-  void* conv1bias_out = tensorAdd(conv1out, conv1bias);
-
-  dumpWeightsToFile("tensors_out/conv1_init.out", conv1out);
-   
-  void* relu1out = tensorRelu(conv1bias_out);
-  printTensorDims(relu1out);
-  dumpWeightsToFile("tensors_out/conv1.out", relu1out);
-  
-  // NOTE: These parameters are a deviation from the original paper
-  // The parameters match the alexnet TF model
-  // TODO: Try removing LRN and measure - seems like impact of LRN may be minimal
-  unsigned int LRN_window = 5;
-  double LRN_alpha = 2e-05 * LRN_window;
-  double LRN_beta = 0.75;
-  double LRN_k = 1.0;
-
-   // TEST-point - Compare TF vs CUDNN
-  void* lrn1out = tensorLRN(relu1out, LRN_window, LRN_alpha, LRN_beta, LRN_k);
-  printTensorDims(lrn1out);
-  dumpWeightsToFile("tensors_out/lrn1.out", lrn1out);
-    
-  void* maxpool1out = tensorPooling(lrn1out, 0, 3, 3, 0, 0, 2, 2);
-  printTensorDims(maxpool1out);  
-  dumpWeightsToFile("tensors_out/maxpool1.out", maxpool1out);
-  
-  /****** End of Conv Layer 1 ***********/
-  
-  // TEST-point
-  void** splits = tensorSplit(maxpool1out, 2, 1);
-
-  void* concat_test1 = tensorConcat(splits, 2, 1);
-  compareTensors(maxpool1out, concat_test1);
-  
-  void* conv2W = readTrainedWeights("../alexnet/params/conv2.bin",
-				    CUDNN_DATA_FLOAT, 256, 48, 5, 5);
-
-  dumpWeightsToFile("tensors_out/conv2filter.out", conv2W); 
-  
-  // TEST point - compare split convolution across TF vs cuDNN
-  void** conv2fils = tensorSplit(conv2W, 2, 0);
-
-  void* concat_test2 = tensorConcat(conv2fils, 2, 0);
-  compareTensors(conv2W, concat_test2);
-  
-  // NOTE: Padding for same conv is computed as P = ((F - 1) / 2)
-  void* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-  printTensorDims(conv2a_out);
-
-  void* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 2, 2, 1, 1,
-				       conv_mode, conv_precision);
-  printTensorDims(conv2b_out);
- 
-  void* conv2_outs[2];
-  conv2_outs[0] = conv2a_out;
-  conv2_outs[1] = conv2b_out;
-
-  // Test point
-  void* conv2_concat_out = tensorConcat(conv2_outs, 2, 1);
-  printTensorDims(conv2_concat_out);
-  dumpWeightsToFile("tensors_out/conv2_init.out", conv2_concat_out); 
-  
-  void* conv2bias = readTrainedWeights("../alexnet/params/conv2.bias.bin",
-				       CUDNN_DATA_FLOAT, 1, 256, 1, 1);  
-  void* conv2bias_out = tensorAdd(conv2_concat_out, conv2bias);
-  printTensorDims(conv2bias_out);
-
-  dumpWeightsToFile("tensors_out/conv2_bias_init.out", conv2bias_out); 
-
-  void* relu2out = tensorRelu(conv2bias_out);
-  dumpWeightsToFile("tensors_out/conv2.out", relu2out); 
-  printTensorDims(relu2out);
- 
-  void* lrn2out = tensorLRN(relu2out, LRN_window, LRN_alpha, LRN_beta, LRN_k);
-  printTensorDims(lrn2out);
-    
-  void* maxpool2out = tensorPooling(lrn2out, 0, 3, 3, 0, 0, 2, 2);
-  printTensorDims(maxpool2out);
-  
-  /******** End of Conv Layer 2 *************/
-
-  void* conv3filter = readTrainedWeights("../alexnet/params/conv3.bin",
-					 CUDNN_DATA_FLOAT, 384, 256, 3, 3);   
-  void* conv3_out = tensorConvolution(maxpool2out, conv3filter, 1, 1, 1, 1,
-				      conv_mode, conv_precision);
-  
-  void* conv3bias = readTrainedWeights("../alexnet/params/conv3.bias.bin",
-				       CUDNN_DATA_FLOAT, 1, 384, 1, 1);
-  void* conv3bias_out = tensorAdd(conv3_out, conv3bias); 
-  void* relu3out = tensorRelu(conv3bias_out);
-  dumpWeightsToFile("tensors_out/conv3.out", relu3out);  
-  printTensorDims(relu3out);
-
-  /********* End of Conv layer 3 *******/
-
-  void** splits2 = tensorSplit(relu3out, 2, 1);
-
-  void* conv4W = readTrainedWeights("../alexnet/params/conv4.bin",
-				    CUDNN_DATA_FLOAT, 384, 192, 3, 3);   
-  void** conv4fils = tensorSplit(conv4W, 2, 0);
-
-  printTensorDims(splits2[0]);
-  printTensorDims(conv4fils[0]);
-
-  // Test-point DOES the pairing of splits and filters make sense?
-  void* conv4a_out = tensorConvolution(splits2[0], conv4fils[0], 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-  printTensorDims(conv4a_out);
-
-  void* conv4b_out = tensorConvolution(splits2[1], conv4fils[1], 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-  printTensorDims(conv4b_out);
- 
-  void* conv4_outs[2];
-  conv4_outs[0] = conv4a_out;
-  conv4_outs[1] = conv4b_out;
-
-  void* conv4_concat_out = tensorConcat(conv4_outs, 2, 1);
-  printTensorDims(conv4_concat_out);
-
-  void* conv4bias = readTrainedWeights("../alexnet/params/conv4.bias.bin",
-						  CUDNN_DATA_FLOAT, 1, 384, 1, 1);
-  void* conv4bias_out = tensorAdd(conv4_concat_out, conv4bias);
-
-  void* relu4out = tensorRelu(conv4bias_out);  
-  printTensorDims(relu4out);
-  
-  /********* End of Conv layer 4 *******/
-
-  void** splits3 = tensorSplit(relu4out, 2, 1);
-
-  void* conv5W = readTrainedWeights("../alexnet/params/conv5.bin",
-					     CUDNN_DATA_FLOAT, 256, 192, 3, 3);  
-  void** conv5fils = tensorSplit(conv5W, 2, 0);
-
-  printTensorDims(splits3[0]);
-  printTensorDims(conv5fils[0]);
-  
-  void* conv5a_out = tensorConvolution(splits3[0], conv5fils[0], 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-  printTensorDims(conv5a_out);
-
-  void* conv5b_out = tensorConvolution(splits3[1], conv5fils[1], 1, 1, 1, 1,
-				       conv_mode, conv_precision);
-  printTensorDims(conv5b_out);
- 
-  void* conv5_outs[2];
-  conv5_outs[0] = conv5a_out;
-  conv5_outs[1] = conv5b_out;
-
-  void* conv5_concat_out = tensorConcat(conv5_outs, 2, 1);
-  printTensorDims(conv5_concat_out);
-
-  void* conv5bias = readTrainedWeights("../alexnet/params/conv5.bias.bin",
-				       CUDNN_DATA_FLOAT, 1, 256, 1, 1);
-  void* conv5bias_out = tensorAdd(conv5_concat_out, conv5bias); 
-  void* relu5out = tensorRelu(conv5bias_out);  
-  printTensorDims(relu5out);
-
-  void* maxpool5out = tensorPooling(relu5out, 0, 3, 3, 0, 0, 2, 2);
-  printTensorDims(maxpool5out);
-
-  /********* End of Conv layer 5 *******/
-
-  // Test-point: I suspect the data may not be layed out correct (either in file or after loading)
-  void* fc1_weights = readTrainedWeights("../alexnet/params/fc1.bin",
-					 CUDNN_DATA_FLOAT, 1, 1, 256*6*6, 4096);
-  void* gemm1out = tensorGemm(maxpool5out, fc1_weights);  
-  printTensorDims(gemm1out);
-
-  void* bias = readTrainedWeights("../alexnet/params/fc1.bias.bin",
-		                           CUDNN_DATA_FLOAT, 1, 1, 1, 4096);
-    
-  void* gemm1biasout = tensorGemmBias(gemm1out, bias);
-  printTensorDims(gemm1biasout);
-
-  void* relu6out = tensorRelu(gemm1biasout);  
-  printTensorDims(relu6out);
-
-  /***** End of FC1 layer *********/
-
-  void* fc2_weights = readTrainedWeights("../alexnet/params/fc2.bin",
-						  CUDNN_DATA_FLOAT, 1, 1, 4096, 4096);
-  void* gemm2out = tensorGemm(relu6out, fc2_weights);  
-  printTensorDims(gemm2out);
-
-  void* bias2 = readTrainedWeights("../alexnet/params/fc2.bias.bin",
-					    CUDNN_DATA_FLOAT, 1, 1, 1, 4096);
-  void* gemm2biasout = tensorGemmBias(gemm2out, bias2);
-  printTensorDims(gemm2biasout);
-
-  void* relu7out = tensorRelu(gemm2biasout);  
-  printTensorDims(relu7out);
-
-  /***** End of FC2 layer *********/
-
-  void* fc3_weights = readTrainedWeights("../alexnet/params/fc3.bin",
-						  CUDNN_DATA_FLOAT, 1, 1, 4096, 1000);  
-  void* gemm3out = tensorGemm(relu7out, fc3_weights);  
-  printTensorDims(gemm3out);
-
-  void* bias3 = readTrainedWeights("../alexnet/params/fc3.bias.bin",
-				            CUDNN_DATA_FLOAT, 1, 1, 1, 1000);
-  void* gemm3biasout = tensorGemmBias(gemm3out, bias3);
-  printTensorDims(gemm3biasout);
-
-  /******** End of FC3 Layer ***********/
-  void* result = tensorSoftmax(gemm3biasout);
-  printTensorDims(result);
-
-  // FIXIT: Pass file with the labels
-  printLikelihood("", test_batch_size, result);
-  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
-  printf("END of Alexnet3 -- \n");  
-}
-
-
-
-
-
-int main(){
-
-  // IMP-NOTE: Always initialize the runtime
-  initializeRuntime(0);
-
-  //testAlexnet1();
-  //testAlexnet2();
-  testAlexnet3(); 
-
-  return 0;
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_fc_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_fc_half.cc
deleted file mode 100644
index c0fee9b659db9ff45f56b75b989fbbed68523d43..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_fc_half.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-#include "../include/types.h"
-
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testFC_half(){
-
-  printf("********* Fully Connected DNN-1 ********* \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000; 
-  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
-  					    float_type, test_batch_size, 1, 28, 28);  
-
-  void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin",
-						  float_type, 1, 1, 784, 128);  
-  void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin",
-					       float_type, 1, 128, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin",
-						  float_type, 1, 1, 128, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin",
-					       float_type, 1, 10, 1, 1);  
-
-  // Start execution profiling Tensor ops
-  startProfiling();
-  
-  // Layer-1
-  void* fc1out = tensorHgemm(input, fc1_weights);  
-  printTensorDims(fc1out);
-  
-  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
-  printTensorDims(fc1_bias_out);
-
-  void* fc1_relu = tensorRelu(fc1_bias_out);
-  printTensorDims(fc1_relu);
-  
-  // Layer-2
-  void* fc2out = tensorHgemm(fc1_relu, fc2_weights);  
-  printTensorDims(fc2out);
-  
-  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
-  printTensorDims(fc2_bias_out);
-
-  void* fc2_relu = tensorRelu(fc2_bias_out);
-  printTensorDims(fc2_relu);
-
-  void* result = tensorSoftmax(fc2_relu);
-  printTensorDims(result);
-
-  stopProfiling();
-  
-  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte",
-		  test_batch_size, result);
-}
-
-
-
-int main(){
-
-  // This initializes the runtime - must be called before anything
-  llvm_hpvm_initTensorRt(0);
-
-  testFC_half();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2.cc
deleted file mode 100644
index d5211be3918adcd030fc40c13cba1ff0d7c53c18..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenet2Arch(){
-
-  printf("********* Lenet-2 Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000;  
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				CUDNN_DATA_FLOAT,
-				test_batch_size, 1, 28, 28);
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* conv1_filter = readTrainedWeights("../model_params/lenet2_params/conv1.bin",
-					  float_type, 32, 1, 5, 5);    
-  void* conv1_bias = readTrainedWeights("../model_params/lenet2_params/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/lenet2_params/conv2.bin",
-					  float_type, 64, 32, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/lenet2_params/conv2_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* fc1_weights = readTrainedWeights("../model_params/lenet2_params/fc1.bin",
-					 float_type, 1, 1, 7*7*64, 1024);  
-  void* fc1_bias = readTrainedWeights("../model_params/lenet2_params/fc1_bias.bin",
-				      float_type, 1, 1024, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/lenet2_params/fc2.bin",
-					 float_type, 1, 1, 1024, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/lenet2_params/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-  // Start power and performnce profiling 
-  startProfiling();
-  
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-  // NOTE: 'SAME' convolution
-  void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1,
-				     conv_mode, conv_precision);
-
-  // NOTE: For tensorAdd, the only dimension that MUST match is channels  
-  tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
-  printTensorDims(conv1out);
-
-  void* conv1_reluout = tensorRelu(conv1out);
-  //dumpWeightsToFile("tensors_out/conv1_relu.out", conv1_reluout);  
-
-  void* pool1out = tensorPooling(conv1_reluout, 0, 2, 2, 0, 0, 2, 2);
-  printTensorDims(pool1out);
-  //dumpWeightsToFile("tensors_out/pool1.out", pool1out);  
-  // NOTE: input channels have to match between tensor op inputs and outputs 
-  void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1,
-				     conv_mode, conv_precision);
-  tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
-  printTensorDims(conv2out);
-
-  void* conv2_reluout = tensorRelu(conv2out);
-  //dumpWeightsToFile("tensors_out/conv2.out", conv2_reluout);  
-
-  void* pool2out = tensorPooling(conv2_reluout, 0, 2, 2, 0, 0, 2, 2);
-  printTensorDims(pool2out);
-  //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out);  
-  
-  void* gemm1out = tensorGemmGPU(pool2out, fc1_weights);  
-  printTensorDims(gemm1out);
-  //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out);  
-  
-  void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-  printTensorDims(gemm1biasout);
-
-  void* relu1out = tensorRelu(gemm1biasout);
-  printTensorDims(relu1out);
-  
-  void* gemm2out = tensorGemmGPU(relu1out, fc2_weights);  
-  printTensorDims(gemm2out);
-  
-  void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
-  printTensorDims(gemm2_biasout);
-  
-  void* result = tensorSoftmax(gemm2_biasout);
-  printTensorDims(result);
-
-  // End profiling and dump output to profile.txt
-  stopProfiling();
-  
-  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte",
-		  test_batch_size, result);
-  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
-}
-
-
-int main(){
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenet2Arch();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2_promise.cc
deleted file mode 100644
index 358cb6a75b8e63ca0a0bd964c9f73f2d16c39b4f..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2_promise.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenet2Arch(){
-
-  printf("********* Lenet-2 Architecture ********** \n");
-
-  int test_batch_size = 10000;  
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				CUDNN_DATA_FLOAT,
-				test_batch_size, 1, 28, 28);
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* conv1_filter = readTrainedWeights("../model_params/lenet2_params/conv1.bin",
-					  float_type, 32, 1, 5, 5);    
-  void* conv1_bias = readTrainedWeights("../model_params/lenet2_params/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/lenet2_params/conv2.bin",
-					  float_type, 64, 32, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/lenet2_params/conv2_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* fc1_weights = readTrainedWeights("../model_params/lenet2_params/fc1.bin",
-					 float_type, 1, 1, 7*7*64, 1024);  
-  void* fc1_bias = readTrainedWeights("../model_params/lenet2_params/fc1_bias.bin",
-				      float_type, 1, 1024, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/lenet2_params/fc2.bin",
-					 float_type, 1, 1, 1024, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/lenet2_params/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-  // Start power and performnce profiling 
-  startProfiling();
-  
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-  // NOTE: 'SAME' convolution
-  void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1,
-				     conv_mode, conv_precision);
-  dumpWeightsToFile("tensors_out/conv1_out.out", conv1out);  
-
-  tensorAdd(conv1out, conv1_bias);  // NOTE: In-place operation
-  printTensorDims(conv1out);
-
-  dumpWeightsToFile("tensors_out/conv1_bias_add.out", conv1out);  
-
-  void* conv1_reluout = tensorRelu(conv1out);
-  dumpWeightsToFile("tensors_out/conv1_relu.out", conv1_reluout);  
-
-  void* pool1out = tensorPooling(conv1_reluout, 0, 2, 2, 0, 0, 2, 2);
-  printTensorDims(pool1out);
-  dumpWeightsToFile("tensors_out/conv1_pool.out", pool1out);
-  
-  // NOTE: input channels have to match between tensor op inputs and outputs 
-  void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1,
-				     conv_mode, conv_precision);
-  tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
-  printTensorDims(conv2out);
-
-  void* conv2_reluout = tensorRelu(conv2out);
-  //dumpWeightsToFile("tensors_out/conv2.out", conv2_reluout);  
-
-  void* pool2out = tensorPooling(conv2_reluout, 0, 2, 2, 0, 0, 2, 2);
-  printTensorDims(pool2out);
-  //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out);  
-  
-  void* gemm1out = tensorGemmGPU(pool2out, fc1_weights);  
-  printTensorDims(gemm1out);
-  //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out);  
-  
-  void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-  printTensorDims(gemm1biasout);
-
-  void* relu1out = tensorRelu(gemm1biasout);
-  printTensorDims(relu1out);
-  
-  void* gemm2out = tensorGemmGPU(relu1out, fc2_weights);  
-  printTensorDims(gemm2out);
-  
-  void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
-  printTensorDims(gemm2_biasout);
-  
-  void* result = tensorSoftmax(gemm2_biasout);
-  printTensorDims(result);
-
-  // End profiling and dump output to profile.txt
-  stopProfiling();
-  
-  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte",
-		  test_batch_size, result);
-}
-
-
-int main(){
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenet2Arch();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet_acc.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet_acc.cc
deleted file mode 100644
index 42e364289e499d92591692a04e42988fd1a66dc5..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet_acc.cc
+++ /dev/null
@@ -1,109 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../../tensor_runtime/include/tensor_runtime.h"
-#include "../../include/utils.h"
-
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenet2Arch(){
-
-  printf("********* Lenet-2 Architecture ********** \n");
-
-  int test_batch_size = 1000;  
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				CUDNN_DATA_FLOAT,
-				test_batch_size, 1, 28, 28);
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* conv1_filter = readTrainedWeights("../model_params/lenet_test_params/conv1.bin",
-					  float_type, 32, 1, 5, 5);    
-  void* conv1_bias = readTrainedWeights("../model_params/lenet_test_params/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/lenet_test_params/conv2.bin",
-					  float_type, 64, 32, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/lenet_test_params/conv2_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* fc1_weights = readTrainedWeights("../model_params/lenet_test_params/fc1.bin",
-					 float_type, 1, 1, 7*7*64, 1024);  
-  void* fc1_bias = readTrainedWeights("../model_params/lenet_test_params/fc1_bias.bin",
-				      float_type, 1, 1024, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/lenet_test_params/fc2.bin",
-					 float_type, 1, 1, 1024, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/lenet_test_params/fc2_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-
-  // Start power and performnce profiling 
-  startProfiling();
-  
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-  // NOTE: 'SAME' convolution
-  void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1,
-				     conv_mode, conv_precision);
-
-  // NOTE: For tensorAdd, the only dimension that MUST match is channels  
-  tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
-  printTensorDims(conv1out);
-
-  void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
-  printTensorDims(pool1out);
-  //dumpWeightsToFile("tensors_out/pool1.out", pool1out);  
-  // NOTE: input channels have to match between tensor op inputs and outputs 
-  void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1,
-				     conv_mode, conv_precision);
-  tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
-  printTensorDims(conv2out);
-
-  //void* conv2_reluout = tensorRelu(conv2out);
-
-  void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
-  printTensorDims(pool2out);
-  //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out);  
-  
-  void* gemm1out = tensorGemmGPU(pool2out, fc1_weights);  
-  printTensorDims(gemm1out);
-  //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out);  
-  
-  void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-  printTensorDims(gemm1biasout);
-
-  void* relu1out = tensorRelu(gemm1biasout);
-  printTensorDims(relu1out);
-  
-  void* gemm2out = tensorGemmGPU(relu1out, fc2_weights);  
-  printTensorDims(gemm2out);
-  
-  void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias);
-  printTensorDims(gemm2_biasout);
-  
-  void* result = tensorSoftmax(gemm2_biasout);
-  printTensorDims(result);
-
-  // End profiling and dump output to profile.txt
-  stopProfiling();
-  
-  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte",
-		  test_batch_size, result);
-  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
-}
-
-
-int main(){
-
-  llvm_hpvm_initTensorRt(0);
-
-  testLenet2Arch();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network.cc
deleted file mode 100644
index e8b70146a10359bf2df7420ae388325e6a658557..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network.cc
+++ /dev/null
@@ -1,152 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-#include "../include/types.h"
-
-
-
-void testFCNetworkArchCPU(){
-
-  printf("********* Fully Connected DNN-1 ********* \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000;
- 
-  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
-  					    float_type, test_batch_size, 1, 28, 28);  
-  void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin",
-						  float_type, 1, 1, 784, 128);  
-  void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin",
-					       float_type, 1, 128, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin",
-						  float_type, 1, 1, 128, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin",
-					       float_type, 1, 10, 1, 1);  
-
-  //dumpWeightsToFile("tensors_out/input_fc.out", input);
-  //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights);  
-
-  printTensorDims(input);
-  printTensorDims(fc1_weights);
-
-  // Start profiling tensor ops
-  startProfiling();
-  
-  // Layer-1
-  void* fc1out = tensorGemmCPU(input, fc1_weights);  
-  printTensorDims(fc1out);
-  //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out);  
-  
-  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
-  //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out);  
-  printTensorDims(fc1_bias_out);
-
-  void* fc1_relu = tensorRelu(fc1_bias_out);
-  //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu);  
-  printTensorDims(fc1_relu);
- 
-  // Layer-2
-  void* fc2out = tensorGemmCPU(fc1_relu, fc2_weights);  
-  //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out);  
-  printTensorDims(fc2out);
-  
-  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
-  //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out);  
-  printTensorDims(fc2_bias_out);
-
-  void* fc2_relu = tensorRelu(fc2_bias_out);
-  //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu);  
-  printTensorDims(fc2_relu);
-
-  void* result = tensorSoftmax(fc2_relu);
-  printTensorDims(result);
-
-  // stopProfiling
-  stopProfiling();
-  
-  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
-  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
-}
-
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testFCNetworkArchGPU(){
-
-  printf("********* Fully Connected DNN-1 ********* \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000; 
-  void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin",
-  					    float_type, test_batch_size, 1, 28, 28);  
-
-  void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin",
-						  float_type, 1, 1, 784, 128);  
-  void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin",
-					       float_type, 1, 128, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin",
-						  float_type, 1, 1, 128, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin",
-					       float_type, 1, 10, 1, 1);  
-
-  // Start execution profiling Tensor ops
-  startProfiling();
-  
-  // Layer-1
-  void* fc1out = tensorGemmGPU(input, fc1_weights);  
-  printTensorDims(fc1out);
-  
-  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
-  //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out);  
-  printTensorDims(fc1_bias_out);
-
-  void* fc1_relu = tensorRelu(fc1_bias_out);
-  //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu);  
-  printTensorDims(fc1_relu);
-
-  // IMPORTANT: Adding errors to the FC1 layer output
-  //tensorAddError(fc1_relu, 3);
- 
-  // Layer-2
-  void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
-  //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out);  
-  printTensorDims(fc2out);
-  
-  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
-  //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out);  
-  printTensorDims(fc2_bias_out);
-
-  void* fc2_relu = tensorRelu(fc2_bias_out);
-  //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu);  
-  printTensorDims(fc2_relu);
-
-  void* result = tensorSoftmax(fc2_relu);
-  printTensorDims(result);
-
-  stopProfiling();
-  
-  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
-  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
-}
-
-
-
-int main(){
-
-  // This initializes the runtime - must be called before anything
-  llvm_hpvm_initTensorRt(0);
-
-  //testFCNetworkArchCPU();
-
-  testFCNetworkArchGPU();
-
-  llvm_hpvm_cleanupTensorRt();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network2.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network2.cc
deleted file mode 100644
index fc00532a1b3712fab9d098a9a8e1a1586f1458a5..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network2.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-#include "../include/types.h"
-
-
-void test4LayerFC(){
-
-  printf("********* 4-layer FC Network ********* \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000;
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				float_type,
-				test_batch_size, 1, 28, 28);    
-  void* fc1_weights = readTrainedWeights("../model_params/FC_network1/fc1.bin",
-					 float_type, 1, 1, 784, 1000);  
-  void* fc1_bias = readTrainedWeights("../model_params/FC_network1/fc1_bias.bin",
-				      float_type, 1, 1000, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/FC_network1/fc2.bin",
-						  float_type, 1, 1, 1000, 500);  
-  void* fc2_bias = readTrainedWeights("../model_params/FC_network1/fc2_bias.bin",
-					       float_type, 1, 500, 1, 1);  
-  void* fc3_weights = readTrainedWeights("../model_params/FC_network1/fc3.bin",
-						  float_type, 1, 1, 500, 200);  
-  void* fc3_bias = readTrainedWeights("../model_params/FC_network1/fc3_bias.bin",
-					       float_type, 1, 200, 1, 1);  
-  void* fc4_weights = readTrainedWeights("../model_params/FC_network1/fc4.bin",
-						  float_type, 1, 1, 200, 10);  
-  void* fc4_bias = readTrainedWeights("../model_params/FC_network1/fc4_bias.bin",
-					       float_type, 1, 10, 1, 1);  
-
-  //dumpWeightsToFile("tensors_out/input_fc.out", input);
-  //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights);  
-
-  // Start Profiling execution times of Tensor operations
-  startProfiling();
-  
-  // Layer-1
-  void* fc1out = tensorGemmGPU(input, fc1_weights);  
-  printTensorDims(fc1out);
-  //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out);  
-  
-  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
-  printTensorDims(fc1_bias_out);
-  //dumpWeightsToFile("tensors_out/fc_fc1.out", fc1_bias_out);
- 
-  // Layer-2
-  void* fc2out = tensorGemmGPU(fc1_bias_out, fc2_weights);  
-  printTensorDims(fc2out);
-  
-  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
-  printTensorDims(fc2_bias_out);
-
-  // Layer-3
-  void* fc3out = tensorGemmGPU(fc2_bias_out, fc3_weights);  
-  printTensorDims(fc3out);
-  
-  void* fc3_bias_out = tensorAdd(fc3out, fc3_bias);
-  printTensorDims(fc3_bias_out);
-
-  // Layer-4
-  void* fc4out = tensorGemmGPU(fc3_bias_out, fc4_weights);  
-  printTensorDims(fc4out);
-  
-  void* fc4_bias_out = tensorAdd(fc4out, fc4_bias);
-  printTensorDims(fc4_bias_out);
- 
-  void* result = tensorSoftmax(fc4_bias_out);
-  printTensorDims(result);
-
-  stopProfiling();
-  
-  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
-} 
-
-
-
-
-int main(){
-
-  llvm_hpvm_initTensorRt(0);
-
-  test4LayerFC();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network3.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network3.cc
deleted file mode 100644
index 531bb01695cddb70de0f9bea90f6b229679e9bce..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network3.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-#include "../include/types.h"
-
-
-void test4LayerFC(){
-
-  printf("********* 4-layer FC Network ********* \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000;
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				float_type,
-				test_batch_size, 1, 28, 28);    
-  void* fc1_weights = readTrainedWeights("../model_params/FC_network3/fc1.bin",
-					 float_type, 1, 1, 784, 512);  
-  void* fc1_bias = readTrainedWeights("../model_params/FC_network3/fc1_bias.bin",
-				      float_type, 1, 512, 1, 1);  
-  void* fc2_weights = readTrainedWeights("../model_params/FC_network3/fc2.bin",
-					 float_type, 1, 1, 512, 256);  
-  void* fc2_bias = readTrainedWeights("../model_params/FC_network3/fc2_bias.bin",
-				      float_type, 1, 256, 1, 1);  
-  void* fc3_weights = readTrainedWeights("../model_params/FC_network3/fc3.bin",
-					 float_type, 1, 1, 256, 128);  
-  void* fc3_bias = readTrainedWeights("../model_params/FC_network3/fc3_bias.bin",
-				      float_type, 1, 128, 1, 1);  
-  void* fc4_weights = readTrainedWeights("../model_params/FC_network3/fc4.bin",
-					 float_type, 1, 1, 128, 10);  
-  void* fc4_bias = readTrainedWeights("../model_params/FC_network3/fc4_bias.bin",
-				      float_type, 1, 10, 1, 1);  
-
-  // Start Profiling execution times of Tensor operations
-  startProfiling();
-  
-  // Layer-1
-  void* fc1out = tensorGemmGPU(input, fc1_weights);  
-  printTensorDims(fc1out);
-  
-  void* fc1_bias_out = tensorAdd(fc1out, fc1_bias);
-  printTensorDims(fc1_bias_out);
-  void* fc1_relu = tensorRelu(fc1_bias_out);
- 
-  // Layer-2
-  void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights);  
-  printTensorDims(fc2out);
-  
-  void* fc2_bias_out = tensorAdd(fc2out, fc2_bias);
-  printTensorDims(fc2_bias_out);
-  void* fc2_relu = tensorRelu(fc2_bias_out);
-
-  // Layer-3
-  void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights);  
-  printTensorDims(fc3out);
-  
-  void* fc3_bias_out = tensorAdd(fc3out, fc3_bias);
-  printTensorDims(fc3_bias_out);
-  void* fc3_relu = tensorRelu(fc3_bias_out);
-
-  // Layer-4
-  void* fc4out = tensorGemmGPU(fc3_relu, fc4_weights);  
-  printTensorDims(fc4out);
-  
-  void* fc4_bias_out = tensorAdd(fc4out, fc4_bias);
-  printTensorDims(fc4_bias_out);  
-  void* fc4_relu = tensorRelu(fc4_bias_out);
- 
-  void* result = tensorSoftmax(fc4_relu);
-  printTensorDims(result);
-
-  stopProfiling();
-  
-  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
-} 
-
-
-
-
-int main(){
-
-  llvm_hpvm_initTensorRt(0);
-
-  test4LayerFC();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_lenet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_lenet.cc
deleted file mode 100644
index e21b09fbf59c6ceee2adcf6df798ef04351a03ef..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_lenet.cc
+++ /dev/null
@@ -1,178 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-
-/* NOTE: Reference Architecture to use for profiling */
-void testLenetArch2(){
-
-  printf("********* Lenet Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000;  
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-				CUDNN_DATA_FLOAT,
-				test_batch_size, 1, 28, 28);
-
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* conv1_filter = readTrainedWeights("../model_params/lenet_params/conv1.bin",
-					  CUDNN_DATA_FLOAT, 20, 1, 5, 5);    
-  void* conv1_bias = readTrainedWeights("../model_params/lenet_params/conv1.bias.bin",
-					CUDNN_DATA_FLOAT, 1, 20, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/lenet_params/conv2.bin",
-					  CUDNN_DATA_FLOAT, 50, 20, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/lenet_params/conv2.bias.bin",
-					CUDNN_DATA_FLOAT, 1, 50, 1, 1);  
-  void* fc1_weights = readTrainedWeights("../model_params/lenet_params/ip1.bin",
-					 CUDNN_DATA_FLOAT, 1, 1, 800, 500);  
-  void* fc1_bias = readTrainedWeights("../model_params/lenet_params/ip1.bias.bin",
-				      CUDNN_DATA_FLOAT, 1, 1, 1, 500);  
-  void* fc2_weights = readTrainedWeights("../model_params/lenet_params/ip2.bin",
-					 CUDNN_DATA_FLOAT, 1, 1, 500, 10);  
-  void* fc2_bias = readTrainedWeights("../model_params/lenet_params/ip2.bias.bin",
-				      CUDNN_DATA_FLOAT, 1, 1, 1, 10);  
-
-
-  // Start power and performnce profiling 
-  startProfiling();
-  
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-  void* conv1out = tensorConvolution(input, conv1_filter, 0, 0, 1, 1,
-				     conv_mode, conv_precision);
-  // NOTE: For tensorAdd, the only dimension that MUST match is channels  
-  tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
-  printTensorDims(conv1out);
-
-  void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
-  printTensorDims(pool1out);
-
-  // NOTE: input channels have to match between tensor op inputs and outputs 
-  void* conv2out = tensorConvolution(pool1out, conv2_filter, 0, 0, 1, 1,
-				     conv_mode, conv_precision);
-  tensorAdd(conv2out, conv2_bias); // NOTE: In place operation
-
-  printTensorDims(conv2out);
-
-  void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
-  printTensorDims(pool2out);
-   
-  void* gemm1out = tensorGemm(pool2out, fc1_weights);  
-  printTensorDims(gemm1out);
-  
-  void* gemm1biasout = tensorGemmBias(gemm1out, fc1_bias);
-  printTensorDims(gemm1biasout);
-
-  void* relu1out = tensorRelu(gemm1biasout);
-  printTensorDims(relu1out);
-  
-  void* gemm2out = tensorGemm(relu1out, fc2_weights);  
-  printTensorDims(gemm2out);
-  
-  void* gemm2_biasout = tensorGemmBias(gemm2out, fc2_bias);
-  printTensorDims(gemm2_biasout);
-  
-  void* result = tensorSoftmax(gemm2_biasout);
-  printTensorDims(result);
-
-  // End profiling and dump output to profile.txt
-  stopProfiling();
-  
-  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
-  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
-}
-
-
-/* This architecture REMOVES the bias adds */
-void testLenetArch3(){
-
-  printf("********* Lenet Architecture ********** \n");
-  // FIXIT: Extend this to batch of images - currently 5 images
-
-  int test_batch_size = 10000;
-  
-  void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte",
-					 CUDNN_DATA_FLOAT,
-					 test_batch_size, 1, 28, 28);
-  // NOTE: Filter descriptors do NOT have batch size
-  // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels)
-  // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class
-  void* conv1_filter = readTrainedWeights("../model_params/lenet_params/conv1.bin",
-						   CUDNN_DATA_FLOAT, 20, 1, 5, 5);    
-  void* conv1_bias = readTrainedWeights("../model_params/lenet_params/conv1.bias.bin",
-						 CUDNN_DATA_FLOAT, 1, 20, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/lenet_params/conv2.bin",
-						   CUDNN_DATA_FLOAT, 50, 20, 5, 5);  
-  void* conv2_bias = readTrainedWeights("../model_params/lenet_params/conv2.bias.bin",
-						 CUDNN_DATA_FLOAT, 1, 50, 1, 1);  
-  void* fc1_weights = readTrainedWeights("../model_params/lenet_params/ip1.bin",
-						  CUDNN_DATA_FLOAT, 1, 1, 800, 500);  
-  void* fc2_weights = readTrainedWeights("../model_params/lenet_params/ip2.bin",
-						  CUDNN_DATA_FLOAT, 1, 1, 500, 10);  
-
-  /* Convolution specific parameters */
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-  
-  void* conv1out = tensorConvolution(input, conv1_filter, 0, 0, 1, 1,
-				     conv_mode, conv_precision);
-  // NOTE: For tensorAdd, the only dimension that MUST match is channels  
-  tensorAdd(conv1out, conv1_bias); // NOTE: In place operation
-  printTensorDims(conv1out);
-
-  void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2);
-  printTensorDims(pool1out);
-
-  // NOTE: input channels have to match between tensor op inputs and outputs 
-  void* conv2out = tensorConvolution(pool1out, conv2_filter, 0, 0, 1, 1,
-				     conv_mode, conv_precision);
-  printTensorDims(conv2out);
-
-  void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2);
-  printTensorDims(pool2out);
-   
-  void* gemm1out = tensorGemm(pool2out, fc1_weights);  
-  printTensorDims(gemm1out);
-
-  void* relu1out = tensorRelu(gemm1out);
-  printTensorDims(relu1out);
-  
-  void* gemm2out = tensorGemm(relu1out, fc2_weights);  
-  printTensorDims(gemm2out);
-
-  void* result = tensorSoftmax(gemm2out);
-  printTensorDims(result);
-	 
-  computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result);
-  // THINK: I believe that comparing the results do not need to be part of the HPVM graph
-  printf("END of Lenet Arch3 -- \n");
-}
-
-
-int main(){
-
-  llvm_hpvm_initTensorRt(0);
-
-  //testTensorAdd();
-  //testTensorConv();
-  //testTensorPool();
-  //testTensorGemm();
-  //testTensorGemmBias();
-  //testTensorRelu();
-  //testTensorSoftmax();
-
-  //testLenetArch();
-  testLenetArch2();
-  //testLenetArch3();
-  
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet2_cifar10_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet2_cifar10_tuner.cc
deleted file mode 100644
index 08e5817fc4aa037bc59cceafc1baba382696e329..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet2_cifar10_tuner.cc
+++ /dev/null
@@ -1,162 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../include/utils.h"
-
-
-
-int total_runs = 1;
-
-/* NOTE: Reference Architecture to use for profiling */
-void testCifarNet(){
-
-  printf("********* Alexnet2 CIFAR-10 DNN ********** \n");
- 
-  std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); 
-  std::string input_path =  dir_prefix + std::string("norm_cifar_input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("test_labels.bin"); 
-
-  void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin",
-					  float_type, 32, 3, 3, 3);  
-  void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin",
-					float_type, 1, 32, 1, 1);  
-  void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin",
-					  float_type, 32, 32, 3, 3);  
-  void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin",
-					float_type, 1, 32, 1, 1);
-  void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin",
-					  float_type, 64, 32, 3, 3);  
-  void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin",
-					float_type, 1, 64, 1, 1);  
-  void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin",
-					  float_type, 64, 64, 3, 3);  
-  void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin",
-					float_type, 1, 64, 1, 1);
-  void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin",
-					  float_type, 128, 64, 3, 3);  
-  void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin",
-					float_type, 1, 128, 1, 1);
-  void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin",
-					  float_type, 128, 128, 3, 3);  
-  void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin",
-					float_type, 1, 128, 1, 1);
-  
-  void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin",
-					 float_type, 1, 1, 2048, 10);  
-  void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin",
-				      float_type, 1, 10, 1, 1);  
- 
-  
-  int conv_mode = 1; // NOTE: using CROSS_CORRELATION
-  int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum
-
-
-  startMemTracking();
-
-  int test_input_size = 500;
-  int batch_size = 500;
-  int offset = 5000;
-  
-  int batch_count = test_input_size / batch_size;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-
-  for(int j = 0; j < total_runs; j++){
-    
-    float final_accuracy = 0.0;
-    for(int i = 0; i < batch_count; i++){
-
-      int start = i * batch_size + offset;
-      int end = (i + 1) * batch_size + offset;
-      void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-    
-      void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv1out, conv1_bias); 
-      void* conv1_tanh = tensorTanh(conv1out);
-    
-      // 2nd Layer
-      void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv2out, conv2_bias); 
-      void* conv2_tanh = tensorTanh(conv2out);
-      void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2);
-     
-      // 3rd Layer
-      void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv3out, conv3_bias); 
-      void* conv3_tanh = tensorTanh(conv3out);
-
-      // 4th Layer
-      void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv4out, conv4_bias); 
-      void* conv4_tanh = tensorTanh(conv4out);
-      void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2);
-    
-      // 5th Layer
-      void* conv5out = tensorConvolution(pool4out, conv5_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv5out, conv5_bias); 
-      void* conv5_tanh = tensorTanh(conv5out);
-
-      // 6th Layer
-      void* conv6out = tensorConvolution(conv5_tanh, conv6_filter, 1, 1, 1, 1,
-					 conv_mode, conv_precision);
-      tensorAdd(conv6out, conv6_bias); 
-  
-      void* conv6_tanh = tensorTanh(conv6out);
-      void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2);
-    
-      // final FC Layer
-      void* gemm1out = tensorGemmGPU(pool6out, fc1_weights);  
-      void* gemm1biasout = tensorAdd(gemm1out, fc1_bias);
-      void* result = tensorSoftmax(gemm1biasout);
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-      float accuracy = computeAccuracy2(labels, batch_size, result); 
-      final_accuracy += accuracy;
-    
-      freeBatchMemory();
-    }
-
-    stopProfiling();
-
-    final_accuracy = final_accuracy / batch_count;
-    dumpFinalAccuracy(final_accuracy);
-  }
-
-
-  dumpExecutionAccuracies();
-     
-
-}
-
-
-
-int main(int argc, char* argv[]){
-
-  if (argc > 1){
-    total_runs = atoi(argv[1]);
-  }
-
-  llvm_hpvm_initTensorRt(0);
-
-  testCifarNet();
-
-  llvm_hpvm_cleanupTensorRt();
-
-  return 0;
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet_cifar10_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet_cifar10_tuner.cc
deleted file mode 100644
index d45cfa9ef3294c4c588b3abb98100dd8391529b7..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet_cifar10_tuner.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-
-int main(int argc, char* argv[]){ 
-
-  int total_runs = 1;
-
-  if (argc > 1){
-    printf("argv[1] = %s \n", argv[1]);
-    total_runs = atoi(argv[1]);
-    printf("total_runs  %d \n", total_runs);
-  }
-
-  
-  llvm_hpvm_initTensorRt(0); 
- 
- 
-  std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/"); 
-  //std::string input_path =  dir_prefix + std::string("alexnet_calib.bin");
-  //std::string labels_path =  dir_prefix + std::string("alexnet_train_labels.bin"); 
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv0.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv_bias0.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv3.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv_bias3.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv6.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv_bias6.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv7.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv_bias7.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv8.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv_bias8.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("fc12.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("fc_bias12.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 500;
-  int batch_size = 500;
-  int offset = 5000;
-  
-  int batch_count = test_input_size / batch_size;
-
-  
-  // NOTE: Starting time profiling
-  startProfiling();
-
-  for(int j = 0; j < total_runs; j++){
-
-    float final_accuracy = 0.0;
-    for(int i = 0; i < batch_count; i++){
-
-      int start = (i * batch_size) + offset;
-      int end = (i + 1) * batch_size + offset;
-      void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);    
-
-      void* var_0 = tensorConvolution(input, conv2d_1_w, 5, 5, 1, 1, 1, 0); 
-      void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-      void* var_2 = tensorTanh(var_1); 
-      void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); 
-      void* var_5 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); 
-      void* var_6 = tensorAdd(var_5, conv2d_2_b); 
-      void* var_7 = tensorTanh(var_6); 
-      void* var_8 = tensorPooling(var_7,0,2,2,0,0,2,2); 
-      void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-      void* var_11 = tensorAdd(var_10, conv2d_3_b); 
-      void* var_12 = tensorTanh(var_11); 
-      void* var_13 = tensorConvolution(var_12, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-      void* var_14 = tensorAdd(var_13, conv2d_4_b); 
-      void* var_15 = tensorTanh(var_14); 
-      void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-      void* var_17 = tensorAdd(var_16, conv2d_5_b); 
-      void* var_18 = tensorTanh(var_17); 
-      void* var_19 = tensorPooling(var_18,0,2,2,0,0,2,2); 
-      void* var_22 = tensorGemmGPU(var_19, dense_1_w); 
-      void* var_23 = tensorAdd(var_22, dense_1_b); 
-      void* var_24 = tensorSoftmax(var_23); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-      float accuracy = computeAccuracy2(labels,batch_size,var_24); 
-      final_accuracy += accuracy;
-    
-      freeBatchMemory();
-    }
-
-    stopProfiling();
-
-    final_accuracy = final_accuracy / batch_count;
-    dumpFinalAccuracy(final_accuracy);
-
-  }
-
-  dumpExecutionAccuracies();
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/resnet18_cifar10_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/resnet18_cifar10_tuner.cc
deleted file mode 100644
index 689e241c5b4a0a5e1c5b98326998f37d5e803f75..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/resnet18_cifar10_tuner.cc
+++ /dev/null
@@ -1,265 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-
-
-int main(int argc, char* argv[]){ 
-
-  int total_runs = 1;
-  if (argc > 1){
-    total_runs = atoi(argv[1]);
-  }
-  
-
-  llvm_hpvm_initTensorRt(0); 
-
-
-  /*int skip_tensor_ids[22];
-  skip_tensor_ids[0] = 0;
-  skip_tensor_ids[1] = 1;
-  skip_tensor_ids[2] = 1;
-  skip_tensor_ids[3] = 3;
-  skip_tensor_ids[4] = 4;
-  skip_tensor_ids[5] = 4;
-  skip_tensor_ids[6] = 10;
-  skip_tensor_ids[7] = 11;
-  skip_tensor_ids[8] = 17;
-  skip_tensor_ids[9] = 18;  
-  skip_tensor_ids[10] = 24;
-  skip_tensor_ids[11] = 25;
-  skip_tensor_ids[12] = 25;
-  skip_tensor_ids[13] = 33;
-  skip_tensor_ids[14] = 34;
-  skip_tensor_ids[15] = 35;
-  skip_tensor_ids[16] = 40;
-  skip_tensor_ids[17] = 41;
-  skip_tensor_ids[18] = 47;
-  skip_tensor_ids[19] = 48;  
-  //--- readSkipTensors(skip_tensor_ids, 22);
-  //-- readSkipTensors(skip_tensor_ids, 10);
-  readSkipTensors(skip_tensor_ids, 20);
-  */
-    
-  std::string dir_prefix = std::string("../model_params/resnet18_cifar10_promise/"); 
-  // std::string input_path =  dir_prefix + std::string("resnet18_calib.bin"); 
-  // std::string labels_path =  dir_prefix + std::string("resnet18_train_labels.bin");
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin"); 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_14_w_path =  dir_prefix + std::string("conv2d_14_w.bin"); 
-  void* conv2d_14_w =  readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); 
-  std::string conv2d_14_b_path =  dir_prefix + std::string("conv2d_14_b.bin"); 
-  void* conv2d_14_b =  readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); 
-  std::string conv2d_15_w_path =  dir_prefix + std::string("conv2d_15_w.bin"); 
-  void* conv2d_15_w =  readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); 
-  std::string conv2d_15_b_path =  dir_prefix + std::string("conv2d_15_b.bin"); 
-  void* conv2d_15_b =  readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_17_w_path =  dir_prefix + std::string("conv2d_17_w.bin"); 
-  void* conv2d_17_w =  readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); 
-  std::string conv2d_17_b_path =  dir_prefix + std::string("conv2d_17_b.bin"); 
-  void* conv2d_17_b =  readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_16_w_path =  dir_prefix + std::string("conv2d_16_w.bin"); 
-  void* conv2d_16_w =  readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_16_b_path =  dir_prefix + std::string("conv2d_16_b.bin"); 
-  void* conv2d_16_b =  readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_18_w_path =  dir_prefix + std::string("conv2d_18_w.bin"); 
-  void* conv2d_18_w =  readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_18_b_path =  dir_prefix + std::string("conv2d_18_b.bin"); 
-  void* conv2d_18_b =  readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_19_w_path =  dir_prefix + std::string("conv2d_19_w.bin"); 
-  void* conv2d_19_w =  readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_19_b_path =  dir_prefix + std::string("conv2d_19_b.bin"); 
-  void* conv2d_19_b =  readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_20_w_path =  dir_prefix + std::string("conv2d_20_w.bin"); 
-  void* conv2d_20_w =  readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_20_b_path =  dir_prefix + std::string("conv2d_20_b.bin"); 
-  void* conv2d_20_b =  readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_21_w_path =  dir_prefix + std::string("conv2d_21_w.bin"); 
-  void* conv2d_21_w =  readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_21_b_path =  dir_prefix + std::string("conv2d_21_b.bin"); 
-  void* conv2d_21_b =  readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 500;
-  int batch_size = 500;
-  int offset = 5000;
-  
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-  // NOTE: Starting time profiling
-  startProfiling();
-  
-
-  for(int j = 0; j < total_runs; j++){
-    
-    float final_accuracy = 0.0;
-    for(int i = 0; i < batch_count; i++){
-
-      int start = i * batch_size + offset;
-      int end = (i + 1) * batch_size + offset;
-    
-      void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32);
-    
-      void* var_2 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-      void* var_3 = tensorAdd(var_2, conv2d_1_b); 
-      void* var_4 = tensorRelu(var_3); 
-      void* var_6 = tensorConvolution(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-      void* var_7 = tensorAdd(var_6, conv2d_2_b); 
-      void* var_8 = tensorRelu(var_7); 
-      void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-      void* var_11 = tensorAdd(var_10, conv2d_3_b); 
-      void* var_12 = tensorAdd(var_4, var_11); 
-      void* var_13 = tensorRelu(var_12); 
-      void* var_15 = tensorConvolution(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-      void* var_16 = tensorAdd(var_15, conv2d_4_b); 
-      void* var_17 = tensorRelu(var_16); 
-      void* var_19 = tensorConvolution(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-      void* var_20 = tensorAdd(var_19, conv2d_5_b); 
-      void* var_21 = tensorAdd(var_13, var_20); 
-      void* var_22 = tensorRelu(var_21); 
-      void* var_24 = tensorConvolution(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-      void* var_25 = tensorAdd(var_24, conv2d_6_b); 
-      void* var_26 = tensorRelu(var_25); 
-      void* var_28 = tensorConvolution(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-      void* var_29 = tensorAdd(var_28, conv2d_7_b); 
-      void* var_30 = tensorAdd(var_22, var_29); 
-      void* var_31 = tensorRelu(var_30); 
-      void* var_33 = tensorConvolution(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0); 
-      void* var_34 = tensorAdd(var_33, conv2d_8_b); 
-      void* var_35 = tensorRelu(var_34); 
-      void* var_37 = tensorConvolution(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-      void* var_38 = tensorAdd(var_37, conv2d_9_b); 
-      void* var_40 = tensorConvolution(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0); 
-      void* var_41 = tensorAdd(var_40, conv2d_10_b); 
-      void* var_42 = tensorAdd(var_41, var_38); 
-      void* var_43 = tensorRelu(var_42); 
-      void* var_45 = tensorConvolution(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-      void* var_46 = tensorAdd(var_45, conv2d_11_b); 
-      void* var_47 = tensorRelu(var_46); 
-      void* var_49 = tensorConvolution(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-      void* var_50 = tensorAdd(var_49, conv2d_12_b); 
-      void* var_51 = tensorAdd(var_43, var_50); 
-      void* var_52 = tensorRelu(var_51); 
-      void* var_54 = tensorConvolution(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-      void* var_55 = tensorAdd(var_54, conv2d_13_b); 
-      void* var_56 = tensorRelu(var_55); 
-      void* var_58 = tensorConvolution(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0); 
-      void* var_59 = tensorAdd(var_58, conv2d_14_b); 
-      void* var_60 = tensorAdd(var_52, var_59); 
-      void* var_61 = tensorRelu(var_60); 
-      void* var_63 = tensorConvolution(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0); 
-      void* var_64 = tensorAdd(var_63, conv2d_15_b); 
-      void* var_65 = tensorRelu(var_64); 
-      void* var_67 = tensorConvolution(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0); 
-      void* var_68 = tensorAdd(var_67, conv2d_16_b); 
-      void* var_70 = tensorConvolution(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0); 
-      void* var_71 = tensorAdd(var_70, conv2d_17_b); 
-      void* var_72 = tensorAdd(var_71, var_68); 
-      void* var_73 = tensorRelu(var_72); 
-      void* var_75 = tensorConvolution(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0); 
-      void* var_76 = tensorAdd(var_75, conv2d_18_b); 
-      void* var_77 = tensorRelu(var_76); 
-      void* var_79 = tensorConvolution(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0); 
-      void* var_80 = tensorAdd(var_79, conv2d_19_b); 
-      void* var_81 = tensorAdd(var_73, var_80); 
-      void* var_82 = tensorRelu(var_81); 
-      void* var_84 = tensorConvolution(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0); 
-      void* var_85 = tensorAdd(var_84, conv2d_20_b); 
-      void* var_86 = tensorRelu(var_85); 
-      void* var_88 = tensorConvolution(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0); 
-      void* var_89 = tensorAdd(var_88, conv2d_21_b); 
-      void* var_90 = tensorAdd(var_82, var_89); 
-      void* var_91 = tensorRelu(var_90); 
-      void* var_92 = tensorPooling(var_91,1,8,8,0,0,8,8); 
-      void* var_94 = tensorGemmGPU(var_92, dense_1_w); 
-      void* var_95 = tensorAdd(var_94, dense_1_b); 
-      void* var_96 = tensorSoftmax(var_95); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-      float accuracy = computeAccuracy2(labels,batch_size,var_96); 
-      final_accuracy += accuracy;
-    
-      freeBatchMemory();
-    }
-
-    stopProfiling();
-
-    final_accuracy = final_accuracy / batch_count;
-    dumpFinalAccuracy(final_accuracy);
-  }
-  
-  dumpExecutionAccuracies();
-
-  
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_top5_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_top5_tuner.cc
deleted file mode 100644
index 56e0e7016f16ce6548d9947e13fda96a931e436b..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_top5_tuner.cc
+++ /dev/null
@@ -1,167 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); 
-  //std::string input_path =  dir_prefix + std::string("vgg16_cifar100_calib.bin"); 
-  //std::string labels_path =  dir_prefix + std::string("vgg16_cifar100_train_labels.bin");
-
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin");
-  
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 4000; 
-  int batch_size = 4000;
-  int offset = 5000;
-  
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size + offset; 
-    int end = (i + 1) * batch_size + offset; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-    void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-    void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-    void* var_2 = tensorRelu(var_1); 
-    void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-    void* var_5 = tensorAdd(var_4, conv2d_2_b); 
-    void* var_6 = tensorRelu(var_5); 
-    void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); 
-    void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-    void* var_9 = tensorAdd(var_8, conv2d_3_b); 
-    void* var_10 = tensorRelu(var_9); 
-    void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-    void* var_13 = tensorAdd(var_12, conv2d_4_b); 
-    void* var_14 = tensorRelu(var_13); 
-    void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); 
-    void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-    void* var_17 = tensorAdd(var_16, conv2d_5_b); 
-    void* var_18 = tensorRelu(var_17); 
-    void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-    void* var_21 = tensorAdd(var_20, conv2d_6_b); 
-    void* var_22 = tensorRelu(var_21); 
-    void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-    void* var_25 = tensorAdd(var_24, conv2d_7_b); 
-    void* var_26 = tensorRelu(var_25); 
-    void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); 
-    void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); 
-    void* var_29 = tensorAdd(var_28, conv2d_8_b); 
-    void* var_30 = tensorRelu(var_29); 
-    void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-    void* var_33 = tensorAdd(var_32, conv2d_9_b); 
-    void* var_34 = tensorRelu(var_33); 
-    void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); 
-    void* var_37 = tensorAdd(var_36, conv2d_10_b); 
-    void* var_38 = tensorRelu(var_37); 
-    void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); 
-    void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-    void* var_41 = tensorAdd(var_40, conv2d_11_b); 
-    void* var_42 = tensorRelu(var_41); 
-    void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-    void* var_45 = tensorAdd(var_44, conv2d_12_b); 
-    void* var_46 = tensorRelu(var_45); 
-    void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-    void* var_49 = tensorAdd(var_48, conv2d_13_b); 
-    void* var_50 = tensorRelu(var_49); 
-    void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); 
-    void* var_54 = tensorGemmGPU(var_51, dense_1_w); 
-    void* var_55 = tensorAdd(var_54, dense_1_b); 
-    void* var_56 = tensorRelu(var_55); 
-    void* var_58 = tensorGemmGPU(var_56, dense_2_w); 
-    void* var_59 = tensorAdd(var_58, dense_2_b); 
-    void* var_60 = tensorSoftmax(var_59); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-    //float accuracy = computeAccuracy2(labels, batch_size, var_60, 100);
-    float accuracy = computeTop5Accuracy(labels, batch_size, var_60, 100);
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_tuner.cc
deleted file mode 100644
index 18e419553641160d59930a72695ec0a191c06d74..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_tuner.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-int main(){ 
-
-  llvm_hpvm_initTensorRt(0); 
-
-  std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); 
-  //std::string input_path =  dir_prefix + std::string("vgg16_cifar100_calib.bin"); 
-  //std::string labels_path =  dir_prefix + std::string("vgg16_cifar100_train_labels.bin");
-
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin");
-  
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); 
-
-
-  startMemTracking(); 
-
-  int test_input_size = 5000; 
-  int batch_size = 5000;
-  int offset = 5000;
-  
-  int batch_count = test_input_size / batch_size; 
-  float final_accuracy = 0.0; 
-
-  for(int i = 0; i < batch_count; i++){ 
-
-    int start = i * batch_size + offset; 
-    int end = (i + 1) * batch_size + offset; 
-
-    void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); 
-
-    void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-    void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-    void* var_2 = tensorRelu(var_1); 
-    void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-    void* var_5 = tensorAdd(var_4, conv2d_2_b); 
-    void* var_6 = tensorRelu(var_5); 
-    void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); 
-    void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-    void* var_9 = tensorAdd(var_8, conv2d_3_b); 
-    void* var_10 = tensorRelu(var_9); 
-    void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-    void* var_13 = tensorAdd(var_12, conv2d_4_b); 
-    void* var_14 = tensorRelu(var_13); 
-    void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); 
-    void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-    void* var_17 = tensorAdd(var_16, conv2d_5_b); 
-    void* var_18 = tensorRelu(var_17); 
-    void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-    void* var_21 = tensorAdd(var_20, conv2d_6_b); 
-    void* var_22 = tensorRelu(var_21); 
-    void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-    void* var_25 = tensorAdd(var_24, conv2d_7_b); 
-    void* var_26 = tensorRelu(var_25); 
-    void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); 
-    void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); 
-    void* var_29 = tensorAdd(var_28, conv2d_8_b); 
-    void* var_30 = tensorRelu(var_29); 
-    void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-    void* var_33 = tensorAdd(var_32, conv2d_9_b); 
-    void* var_34 = tensorRelu(var_33); 
-    void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); 
-    void* var_37 = tensorAdd(var_36, conv2d_10_b); 
-    void* var_38 = tensorRelu(var_37); 
-    void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); 
-    void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-    void* var_41 = tensorAdd(var_40, conv2d_11_b); 
-    void* var_42 = tensorRelu(var_41); 
-    void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-    void* var_45 = tensorAdd(var_44, conv2d_12_b); 
-    void* var_46 = tensorRelu(var_45); 
-    void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-    void* var_49 = tensorAdd(var_48, conv2d_13_b); 
-    void* var_50 = tensorRelu(var_49); 
-    void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); 
-    void* var_54 = tensorGemmGPU(var_51, dense_1_w); 
-    void* var_55 = tensorAdd(var_54, dense_1_b); 
-    void* var_56 = tensorRelu(var_55); 
-    void* var_58 = tensorGemmGPU(var_56, dense_2_w); 
-    void* var_59 = tensorAdd(var_58, dense_2_b); 
-    void* var_60 = tensorSoftmax(var_59); 
-
-    uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); 
-
-    float accuracy = computeAccuracy2(labels, batch_size, var_60, 100); 
-    final_accuracy += accuracy; 
-    freeBatchMemory(); 
- 
-  }
-
-  final_accuracy = final_accuracy / batch_count; 
-  dumpFinalAccuracy(final_accuracy); 
-
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar10_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar10_tuner.cc
deleted file mode 100644
index 552001ba7af481845f75cd95e3249bc7ba7d0a97..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar10_tuner.cc
+++ /dev/null
@@ -1,180 +0,0 @@
-
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <unistd.h> 
-#include <fcntl.h> 
-#include <sys/types.h> 
-#include <sys/stat.h> 
-#include <string.h> 
-#include "../../tensor_runtime/include/tensor_runtime.h" 
-#include "../include/utils.h" 
-
-
-int main(int argc, char* argv[]){ 
-
-  int total_runs = 1;
-  if (argc > 1){
-    total_runs = atoi(argv[1]);
-  }
-  
-
-  llvm_hpvm_initTensorRt(1); 
-
-  std::string dir_prefix = std::string("../model_params/vgg16_cifar10_2/"); 
-  //std::string input_path =  dir_prefix + std::string("vgg16_cifar_calib.bin"); 
-  //std::string labels_path =  dir_prefix + std::string("vgg16_train_labels.bin");
-  std::string input_path =  dir_prefix + std::string("input.bin"); 
-  std::string labels_path =  dir_prefix + std::string("labels.bin");
- 
-  std::string conv2d_1_w_path =  dir_prefix + std::string("conv2d_1_w.bin"); 
-  void* conv2d_1_w =  readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); 
-  std::string conv2d_1_b_path =  dir_prefix + std::string("conv2d_1_b.bin"); 
-  void* conv2d_1_b =  readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_2_w_path =  dir_prefix + std::string("conv2d_2_w.bin"); 
-  void* conv2d_2_w =  readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); 
-  std::string conv2d_2_b_path =  dir_prefix + std::string("conv2d_2_b.bin"); 
-  void* conv2d_2_b =  readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); 
-  std::string conv2d_3_w_path =  dir_prefix + std::string("conv2d_3_w.bin"); 
-  void* conv2d_3_w =  readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); 
-  std::string conv2d_3_b_path =  dir_prefix + std::string("conv2d_3_b.bin"); 
-  void* conv2d_3_b =  readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_4_w_path =  dir_prefix + std::string("conv2d_4_w.bin"); 
-  void* conv2d_4_w =  readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); 
-  std::string conv2d_4_b_path =  dir_prefix + std::string("conv2d_4_b.bin"); 
-  void* conv2d_4_b =  readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); 
-  std::string conv2d_5_w_path =  dir_prefix + std::string("conv2d_5_w.bin"); 
-  void* conv2d_5_w =  readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); 
-  std::string conv2d_5_b_path =  dir_prefix + std::string("conv2d_5_b.bin"); 
-  void* conv2d_5_b =  readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_6_w_path =  dir_prefix + std::string("conv2d_6_w.bin"); 
-  void* conv2d_6_w =  readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_6_b_path =  dir_prefix + std::string("conv2d_6_b.bin"); 
-  void* conv2d_6_b =  readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_7_w_path =  dir_prefix + std::string("conv2d_7_w.bin"); 
-  void* conv2d_7_w =  readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); 
-  std::string conv2d_7_b_path =  dir_prefix + std::string("conv2d_7_b.bin"); 
-  void* conv2d_7_b =  readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); 
-  std::string conv2d_8_w_path =  dir_prefix + std::string("conv2d_8_w.bin"); 
-  void* conv2d_8_w =  readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); 
-  std::string conv2d_8_b_path =  dir_prefix + std::string("conv2d_8_b.bin"); 
-  void* conv2d_8_b =  readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_9_w_path =  dir_prefix + std::string("conv2d_9_w.bin"); 
-  void* conv2d_9_w =  readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_9_b_path =  dir_prefix + std::string("conv2d_9_b.bin"); 
-  void* conv2d_9_b =  readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_10_w_path =  dir_prefix + std::string("conv2d_10_w.bin"); 
-  void* conv2d_10_w =  readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_10_b_path =  dir_prefix + std::string("conv2d_10_b.bin"); 
-  void* conv2d_10_b =  readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_11_w_path =  dir_prefix + std::string("conv2d_11_w.bin"); 
-  void* conv2d_11_w =  readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_11_b_path =  dir_prefix + std::string("conv2d_11_b.bin"); 
-  void* conv2d_11_b =  readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_12_w_path =  dir_prefix + std::string("conv2d_12_w.bin"); 
-  void* conv2d_12_w =  readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_12_b_path =  dir_prefix + std::string("conv2d_12_b.bin"); 
-  void* conv2d_12_b =  readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); 
-  std::string conv2d_13_w_path =  dir_prefix + std::string("conv2d_13_w.bin"); 
-  void* conv2d_13_w =  readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); 
-  std::string conv2d_13_b_path =  dir_prefix + std::string("conv2d_13_b.bin"); 
-  void* conv2d_13_b =  readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_1_w_path =  dir_prefix + std::string("dense_1_w.bin"); 
-  void* dense_1_w =  readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); 
-  std::string dense_1_b_path =  dir_prefix + std::string("dense_1_b.bin"); 
-  void* dense_1_b =  readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); 
-  std::string dense_2_w_path =  dir_prefix + std::string("dense_2_w.bin"); 
-  void* dense_2_w =  readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); 
-  std::string dense_2_b_path =  dir_prefix + std::string("dense_2_b.bin"); 
-  void* dense_2_b =  readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); 
-
-
-  startMemTracking();
-
-  int test_input_size = 500;
-  int batch_size = 500;
-  int offset = 5000;
-  
-  int batch_count = test_input_size / batch_size;
-  float final_accuracy = 0.0;
-
-
-  for(int j = 0; j < total_runs; j++){
-    
-    float final_accuracy = 0.0;
-    for(int i = 0; i < batch_count; i++){
-
-      int start = i * batch_size + offset;
-      int end = (i + 1) * batch_size + offset;
-    
-      void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); 
- 
-      void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); 
-      void* var_1 = tensorAdd(var_0, conv2d_1_b); 
-      void* var_2 = tensorRelu(var_1); 
-      void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); 
-      void* var_5 = tensorAdd(var_4, conv2d_2_b); 
-      void* var_6 = tensorRelu(var_5); 
-      void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); 
-      void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); 
-      void* var_9 = tensorAdd(var_8, conv2d_3_b); 
-      void* var_10 = tensorRelu(var_9); 
-      void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); 
-      void* var_13 = tensorAdd(var_12, conv2d_4_b); 
-      void* var_14 = tensorRelu(var_13); 
-      void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); 
-      void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); 
-      void* var_17 = tensorAdd(var_16, conv2d_5_b); 
-      void* var_18 = tensorRelu(var_17); 
-      void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); 
-      void* var_21 = tensorAdd(var_20, conv2d_6_b); 
-      void* var_22 = tensorRelu(var_21); 
-      void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); 
-      void* var_25 = tensorAdd(var_24, conv2d_7_b); 
-      void* var_26 = tensorRelu(var_25); 
-      void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); 
-      void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); 
-      void* var_29 = tensorAdd(var_28, conv2d_8_b); 
-      void* var_30 = tensorRelu(var_29); 
-      void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); 
-      void* var_33 = tensorAdd(var_32, conv2d_9_b); 
-      void* var_34 = tensorRelu(var_33); 
-      void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); 
-      void* var_37 = tensorAdd(var_36, conv2d_10_b); 
-      void* var_38 = tensorRelu(var_37); 
-      void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); 
-      void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); 
-      void* var_41 = tensorAdd(var_40, conv2d_11_b); 
-      void* var_42 = tensorRelu(var_41); 
-      void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); 
-      void* var_45 = tensorAdd(var_44, conv2d_12_b); 
-      void* var_46 = tensorRelu(var_45); 
-      void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); 
-      void* var_49 = tensorAdd(var_48, conv2d_13_b); 
-      void* var_50 = tensorRelu(var_49); 
-      void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); 
-      void* var_54 = tensorGemmGPU(var_51, dense_1_w); 
-      void* var_55 = tensorAdd(var_54, dense_1_b); 
-      void* var_56 = tensorRelu(var_55); 
-      void* var_58 = tensorGemmGPU(var_56, dense_2_w); 
-      void* var_59 = tensorAdd(var_58, dense_2_b); 
-      void* var_60 = tensorSoftmax(var_59); 
-
-      uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); 
-
-      float accuracy = computeAccuracy2(labels,batch_size,var_60); 
-      final_accuracy += accuracy;
-    
-      freeBatchMemory();
-    }
-
-    final_accuracy = final_accuracy / batch_count;
-    dumpFinalAccuracy(final_accuracy);
-  }
-
-  dumpExecutionAccuracies();
-  
-  llvm_hpvm_cleanupTensorRt(); 
-
-  return 0; 
-
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/CMakeLists.txt
deleted file mode 100644
index 095e037430dbf1751dddfd047d0cf0157ad9e2e7..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/CMakeLists.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-cmake_minimum_required (VERSION 2.6)
-project (cudnn-training)
-
-find_package(CUDA 6.5 REQUIRED)
-
-
-if (CMAKE_BUILD_TYPE STREQUAL "Debug")
-  message("Debug mode")
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-g;-lineinfo;-Xcompiler;-ggdb;-lcurand)
-else()
-   set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-DNDEBUG;-Xcompiler;-DNDEBUG;-lcurand)
-endif()
-
-set(CUDA_PROPAGATE_HOST_FLAGS OFF)
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11  -I/  " )
-
-add_definitions(-DNO_INJECTION)
-add_definitions(-DPROMISE_TUNER_ENABLED)
-if(USE_GFLAGS)
-  add_definitions(-DUSE_GFLAGS)
-endif()
-
-if(USE_AUTOTUNER)
-  remove_definitions(-DNO_INJECTION)
-endif()
-
- 
-
-include_directories($ENV{CUDNN_PATH} /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/include)
-include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/./tensor_runtime/include)
-include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../gpu_profiler/include)
-include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../soc_simulator/include)
-link_directories($ENV{CUDNN_PATH} /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/lib /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/lib64)
-
-
-cuda_add_library(tensor_runtime /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu)
-cuda_add_cublas_to_target(tensor_runtime)
-
-cuda_add_library(tensor_cpu_runtime /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc)
-
-find_library(GPU_PROFILER_LIB
-    NAMES libgpu_profiler.a
-    HINTS /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../gpu_profiler/lib
-)
-
-find_library(SOC_SIMULATOR_LIB
-    NAMES libpromise_profiler.a
-    HINTS /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../soc_simulator/lib
-)
-
-
-if(USE_GFLAGS)
-  target_link_libraries(tensor_runtime gflags cudnn -lcurand)
-else()
-  target_link_libraries(tensor_runtime cudnn -lcurand)
-endif()
-
-target_link_libraries(tensor_cpu_runtime)
-
-# lenet_keras_half_autogenerated_knobs
-add_executable(lenet_keras_fp16_perf20 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf20.cc)
-target_link_libraries(lenet_keras_fp16_perf20 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_perf26 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf26.cc)
-target_link_libraries(lenet_keras_fp16_perf26 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_perf22 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf22.cc)
-target_link_libraries(lenet_keras_fp16_perf22 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_perf25 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf25.cc)
-target_link_libraries(lenet_keras_fp16_perf25 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_perf23 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf23.cc)
-target_link_libraries(lenet_keras_fp16_perf23 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_samp33 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp33.cc)
-target_link_libraries(lenet_keras_fp16_samp33 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_perf24 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf24.cc)
-target_link_libraries(lenet_keras_fp16_perf24 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_samp31 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp31.cc)
-target_link_libraries(lenet_keras_fp16_samp31 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_perf30 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf30.cc)
-target_link_libraries(lenet_keras_fp16_perf30 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_samp36 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp36.cc)
-target_link_libraries(lenet_keras_fp16_samp36 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_perf21 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf21.cc)
-target_link_libraries(lenet_keras_fp16_perf21 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_samp34 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp34.cc)
-target_link_libraries(lenet_keras_fp16_samp34 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_samp32 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp32.cc)
-target_link_libraries(lenet_keras_fp16_samp32 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_samp35 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp35.cc)
-target_link_libraries(lenet_keras_fp16_samp35 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_perf29 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf29.cc)
-target_link_libraries(lenet_keras_fp16_perf29 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_perf27 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf27.cc)
-target_link_libraries(lenet_keras_fp16_perf27 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-add_executable(lenet_keras_fp16_perf28 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf28.cc)
-target_link_libraries(lenet_keras_fp16_perf28 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-
-
-# lenet_keras_autogenerated_knobs
-add_executable(lenet_keras_fp32_perf20 lenet_keras_autogenerated_knobs/lenet_keras_fp32_perf20.cc)
-target_link_libraries(lenet_keras_fp32_perf20 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB})
-
-
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_different_clock_frequencies_testing_automator.py b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_different_clock_frequencies_testing_automator.py
deleted file mode 100644
index d787af8ec350b7fa2f2eeb2b0ed4c3ae4c015c95..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_different_clock_frequencies_testing_automator.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# Automates online benchmark testing with different clock speeds
-# Input: GPU clock speed, DDR clock speed, set of benchmark names to test
-# Set of benchmarks format: (full_bin_name, half_bin_name)
-import os
-import sys
-
-from subprocess import Popen, PIPE
-
-def set_clock_speeds(gpu_speed_mhz, ddr_speed_mhz):
-    def find_closest_clock_speed(goal_speed):
-        # Reads /sys/devices/17000000.gp10b/devfreq/17000000.gp10b/available_frequencies
-        # and finds the closest clock speed
-        AVAIL_FREQS = "/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/available_frequencies"
-        avail_freqs_file = open(AVAIL_FREQS, "r")
-        avail_speeds_lst = avail_freqs_file.read().strip().split()
-        avail_freqs_file.close()
-
-        min_diff = abs(gpu_speed - int(avail_speeds_lst[0])) 
-        closest_speed = int(avail_speeds_lst[0])
-        for avail_speed in avail_speeds_lst[1:]:
-            avail_speed = int(avail_speed)
-            curr_diff = abs(gpu_speed - avail_speed)
-            if curr_diff < min_diff:
-                min_diff = curr_diff
-                closest_speed = avail_speed
-        return closest_speed
-
-    new_conf_filename = 'jetson_clocks_conf%d_%d.txt' % (gpu_speed_mhz, ddr_speed_mhz)
-    curr_conf_filename = "jetson_clocks_conf_backup.txt"
-    if os.path.isfile(curr_conf_filename):
-        os.remove(curr_conf_filename)
-
-    # Get the current configurations in a file 
-    sudo_password = 'nvidia'
-    p = Popen(['sudo', '/home/nvidia/jetson_clocks.sh', '--store', curr_conf_filename], \
-            stdin=PIPE, universal_newlines=True)
-    p.communicate(sudo_password + '\n')
-    assert p.returncode == 0
-
-    # Read the current config file in 
-    curr_conf_file = open(curr_conf_filename, "r")
-    curr_confs = curr_conf_file.read().strip().split('\n')
-    curr_conf_file.close()
-    
-    GPU_MIN_FREQ = "/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/min_freq"
-    GPU_MAX_FREQ = "/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/max_freq"
-    GPU_CUR_FREQ = "/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/cur_freq"
-    
-    DDR_UPDATE_PATH = "/sys/kernel/debug/bpmp/debug/clk/emc/rate"
-
-    # Copy everything in the old configuration except for the GPU/DDR lines
-    new_conf_file = open(new_conf_filename, "w")
-    for line in curr_confs:
-		# Write the GPU clock frequencies at the end to configure the clocks even if
-		# the current configuration doesn't have one of the lines
-        if line.startswith(GPU_MIN_FREQ) or line.startswith(GPU_MAX_FREQ) or \
-					line.startswith(GPU_CUR_FREQ) or line.startswith(DDR_UPDATE_PATH):
-            continue
-        else:
-            new_conf_file.write("%s\n" % line)
-
-    MHZ_TO_HZ_MULT = 1000000
-    gpu_speed = gpu_speed_mhz * MHZ_TO_HZ_MULT
-    ddr_speed = ddr_speed_mhz * MHZ_TO_HZ_MULT
-
-	# Set GPU
-    closest_gpu_speed = find_closest_clock_speed(gpu_speed)
-    print("Setting GPU speed to %d" % closest_gpu_speed)
-    new_conf_file.write("%s:%d\n" % (GPU_MIN_FREQ, closest_gpu_speed))
-    new_conf_file.write("%s:%d\n" % (GPU_MAX_FREQ, closest_gpu_speed))
-    #new_conf_file.write("%s:%d\n" % (GPU_CUR_FREQ, closest_gpu_speed))
-
-	# Set DDR
-    new_conf_file.write("%s:%d\n" % (DDR_UPDATE_PATH, ddr_speed))
-    new_conf_file.close()
-
-    # Set the new configuration
-    p = Popen(['sudo', '/home/nvidia/jetson_clocks.sh', '--restore', new_conf_filename], \
-            stdin=PIPE, universal_newlines=True)
-    p.communicate(sudo_password + '\n')
-    assert p.returncode == 0
-    print("SUCCESSFULLY SET CLOCK SPEEDS")
-
-
-def run_benchmark(bin_name, should_print_bin_output):
-    print("RUNNING %s" % bin_name)
-    proc = Popen("./%s" % bin_name, stdout = PIPE, universal_newlines = True)
-    proc_output = proc.communicate()[0]
-    assert proc.returncode == 0
-    
-    if should_print_bin_output:
-		print(proc_output)
-    print("FINISHED RUNNING %s" % bin_name)
-    return proc_output    
-
-
-def parse_binary_output(proc_output):
-    avg_time_key_ind = proc_output.find("Average time:")
-    assert avg_time_key_ind >= 0
-    avg_time = proc_output[avg_time_key_ind : proc_output.find("\n", avg_time_key_ind)]
-    print(avg_time)
-    return avg_time
-
-
-# Input: a list of tuples of benchmark names
-# Can change to input a file containing benchmarks to run 
-def run_benchmarks(benchmarks_filename, output_filename, should_print_bin_output):
-    benchmarks_file = open(benchmarks_filename, "r")
-    output_file = open(output_filename, "w")
-
-    def parse_binary_names_tuple(tuple_line):
-        tuple_line = tuple_line.replace("(", "").replace(")", "").strip().split(',')
-        return tuple_line[0].strip(), tuple_line[1].strip()
-
-    for line in benchmarks_file:
-        full_bin_name, half_bin_name = parse_binary_names_tuple(line)
-        output_file.write("%s: %s\n" % (full_bin_name, \
-                parse_binary_output(run_benchmark(full_bin_name, should_print_bin_output))))
-        output_file.write("%s: %s\n" % (half_bin_name, \
-                parse_binary_output(run_benchmark(half_bin_name, should_print_bin_output))))    
-
-    benchmarks_file.close()
-    output_file.close()
-
-
-if __name__ == "__main__":
-    num_args = len(sys.argv)
-
-    if num_args != 5 and num_args != 6:
-        print("Usage: python online_benchmark_testing_automator.py <gpu freq in MHz> <ddr freq in MHz> <binary_names_file> <output_file> [1 to print binary output]")
-        print("Binary names file format: (full_binary_name, half_binary_name)<newline>")
-        exit(1)
-    print("GPU clock speed: %s" % sys.argv[1])
-    print("DDR clock speed: %s" % sys.argv[2])
-    print("Benchmarks file name: %s" % sys.argv[3])
-    print("Output file name: %s" % sys.argv[4])
-
-    set_clock_speeds(int(sys.argv[1]), int(sys.argv[2]))
-    run_benchmarks(sys.argv[3], sys.argv[4], num_args == 6 and sys.argv[-1] == "1")
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_testing_automator.py b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_testing_automator.py
deleted file mode 100644
index 197b653d3bf6983a9500badcc4766bac1274fb63..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_testing_automator.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Automates online benchmark testing with different clock speeds
-# Input: set of benchmark names to test
-# Set of benchmarks format: (full_bin_name, half_bin_name)
-import os
-import sys
-
-from collections import defaultdict
-from subprocess import Popen, PIPE
-
-def run_benchmark(bin_name): 
-    print("RUNNING %s" % bin_name)
-    proc = Popen("./%s" % bin_name, stdout = PIPE, universal_newlines = True)
-    proc_output = proc.communicate()[0]
-    assert proc.returncode == 0
-    print("FINISHED RUNNING %s" % bin_name)
-    return proc_output    
-
-
-def parse_binary_output(proc_output, per_tensor):
-    final_acc_key_ind = proc_output.find("**** Final Accuracy")
-    assert final_acc_key_ind >= 0
-    final_acc = proc_output[final_acc_key_ind : proc_output.find("\n", final_acc_key_ind)]
-    print(final_acc)
-
-    if per_tensor:
-        first_op_ind = proc_output.find("Operation ")
-        total_op_ind = proc_output.find('\n', proc_output.find("Total energy"))
-        assert first_op_ind >= 0
-        assert total_op_ind >= 0
-
-        time_energy_output = proc_output[first_op_ind : total_op_ind]
-        print(time_energy_output)
-        return time_energy_output, final_acc
-
-    else:
-        avg_time_key_ind = proc_output.find("Average time:")
-        assert avg_time_key_ind >= 0
-
-        avg_time = proc_output[avg_time_key_ind : proc_output.find("\n", avg_time_key_ind)]
-        print(avg_time)
-
-        return avg_time, final_acc
-
-
-def get_sorted_binaries(builds_dir):
-    # dict of network names to lists of binaries
-    # list of binaries should be in sorted order (can do that when we run the benchmarks)
-    network_bins = defaultdict(list)
-    for bin_name in os.listdir(builds_dir):
-        if bin_name.find("profiling") == -1:
-            continue
-        network_name = bin_name[ : bin_name.rfind("_")]
-        network_bins[network_name].append(bin_name)
-    return network_bins
-
-
-# Input: a list of tuples of benchmark names
-# Can change to input a file containing benchmarks to run 
-def run_benchmarks(sorted_bins, builds_dir, output_filename, per_tensor):
-    def get_knob_id(bin_name):
-        return int(bin_name[bin_name.rfind("_") + 1 : ])
-
-    output_file = open(output_filename, "w", buffering = 0)
-    for network_name in sorted(sorted_bins.keys()):
-        # Sort the binaries in order by knob id
-        sorted_bins[network_name].sort(key = get_knob_id)
-        print("--------------------------------------")
-        print(network_name)
-        output_file.write("--------------------------------------\n%s\n" % network_name)
-
-        # Go through all binaries
-        for bin_name in sorted_bins[network_name]:
-            print(bin_name)
-            binary_output = run_benchmark(os.path.join(builds_dir, bin_name)) 
-            time_energy_output, final_acc = parse_binary_output(binary_output, per_tensor)
-            output_file.write("%s, %s, %s\n\n" % (bin_name, time_energy_output, final_acc))
-        print("--------------------------------------\n")
-        output_file.write("--------------------------------------\n\n")
-    output_file.close()
-
-if __name__ == "__main__":
-    num_args = len(sys.argv)
-
-    if num_args != 3 and num_args != 4:
-        print("Usage: python online_benchmark_testing_automator.py <builds dir> <outputs_file_name> [per_tensor]")
-        print("To delete autogen dirs: python online_benchmark_testing_automator.py clean")
-        exit(1)
-    print("Output file name: %s" % sys.argv[2])
-    sorted_bins = get_sorted_binaries(sys.argv[1])
-    run_benchmarks(sorted_bins, sys.argv[1], sys.argv[2], num_args == 4 and sys.argv[3] == "per_tensor")
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/cmakelists_generator.py b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/cmakelists_generator.py
deleted file mode 100644
index 04f6c5eec378276cd0c89fcc7013cb6996a90f2f..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/cmakelists_generator.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Generates a CMakeLists.txt file for all generated files in a specific directory
-# Input: Arbitrarily long list containing names of all generated files directories
-# Ex: alexnet_cifar10_autogenerated_knobs mobilenet_cifar10_autogenerated_knobs
-# If inputted 0 parameters: Generates CMakeLists.txt file for all generated files in CURRENT dir
-
-import sys
-import os
-
-def get_all_generated_directory_names(): 
-    '''
-    Returns a list of all generated source code directories (<>_autogenerated_knobs)
-    in the current directory. Called when program is run with 0 args
-    '''
-    generated_dir_names = []
-    for dir_name in os.listdir("."):
-        print(dir_name)
-        if dir_name.endswith("autogenerated_knobs"):
-            generated_dir_names.append(dir_name)
-    return generated_dir_names
-
-
-def generate_cmakelists_setup(cmakelists_file):
-    '''
-    Copies over all the setup instructions (ex: finding libraries) from a "base" CMakeLists.txt
-    file. Ends copyng when we find the first instance of add_executable
-
-    Args:
-        cmakelists_file: File object to write cmake instructions to 
-
-    Assumption: All setup instructions are being any add_executable instructions
-    '''
-    BASE_CMAKELISTS_PATH = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt"
-    base_cmakelists_file = open(os.path.join(BASE_CMAKELISTS_PATH, "CMakeLists.txt"), "r")
-
-    find_lib_line = ""
-
-    for line in base_cmakelists_file:
-        if line.find("add_executable") != -1:
-            break
-
-        elif line.startswith("#"):
-            continue
-
-        # Special case: ignore / if -I flag exists
-        elif line.find("/") != -1 and line.find("-I") == -1: 
-            dot_dot_slash_ind = line.find("../")
-            dot_slash_ind = line.find("./")
-            if dot_dot_slash_ind != -1:
-                start_ind = dot_dot_slash_ind
-            elif dot_slash_ind != -1:
-                start_ind = dot_slash_ind
-            else:
-                slash_ind = line.find("/")
-                prev_space_ind = line[:slash_ind].rfind(" ")
-                start_ind = prev_space_ind + 1
-
-            old_rel_path = []
-            while start_ind < len(line):
-                if line[start_ind] == ")" or line[start_ind].isspace():
-                    break
-                old_rel_path.append(line[start_ind])
-                start_ind += 1
-            old_rel_path = ''.join(old_rel_path)
-            if os.path.isabs(old_rel_path):
-                cmakelists_file.write(line)
-            else:
-                new_path = os.path.join(BASE_CMAKELISTS_PATH, old_rel_path)
-                cmakelists_file.write(line.replace(old_rel_path, new_path))
-            continue
-        cmakelists_file.write(line)
-    base_cmakelists_file.close()
-
-
-def generate_cmakelists_file(cmakelists_file, source_file_dirs):
-    generate_cmakelists_setup(cmakelists_file)
-    LIBRARIES = "tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}"
-    cmake_instrs = []
-
-    for source_file_dir in source_file_dirs:
-        cmake_instrs.append("# %s" % source_file_dir)
-        for source_file in os.listdir(source_file_dir):
-            # Executable name = name of source code file without file extension
-            file_ext_ind = source_file.find(".cc")
-            if file_ext_ind == -1:
-                print("WARNING: Found file with wrong extension. Skipping. %s" % source_file)
-                continue
-            exec_name = source_file[ : file_ext_ind]
-            
-            source_file_path = os.path.join(source_file_dir, source_file)
-            cmake_instrs.append("add_executable(%s %s)" % (exec_name, source_file_path))
-            cmake_instrs.append("target_link_libraries(%s %s)\n" % (exec_name, LIBRARIES))
-        cmake_instrs.append("\n")
-    cmakelists_file.write('\n'.join(cmake_instrs))
-
-
-if __name__ == "__main__":
-    num_args = len(sys.argv)
-
-    if num_args >= 2 and sys.argv[1] == "--usage":
-        print("python cmakelists_generator.py <names of all generated files directories>")
-        print("If given no parameters: Generates CMakeLists.txt file for all generated files in CURRENT directory")
-        exit(1)
-
-    cmakelists_file = open("CMakeLists.txt", "w")
-    if num_args == 1:
-        generate_cmakelists_file(cmakelists_file, get_all_generated_directory_names())
-    else:
-        generate_cmakelists_file(cmakelists_file, sys.argv[1:])
-    cmakelists_file.close()
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16.txt
deleted file mode 100644
index 563d7f4a03b3b3a50e2c08c76616a88ea7958b5a..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-../dnn_sources/src/half/profiling/alexnet2_cifar10_half_profiling.cc
-../dnn_sources/src/half/profiling/alexnet_cifar10_half_profiling.cc
-../dnn_sources/src/half/profiling/mobilenet_depthwise_half_profiling.cc
-../dnn_sources/src/half/profiling/mobilenet_shallow_depthwise_half_profiling.cc
-../dnn_sources/src/half/profiling/resnet18_cifar10_half_profiling.cc
-../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc
-../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_first_three.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_first_three.txt
deleted file mode 100644
index 4a0beb250e2241c7523e69b5262cb9ffc977d28d..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_first_three.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-../dnn_sources/src/half/profiling/alexnet2_cifar10_half_profiling.cc
-../dnn_sources/src/half/profiling/alexnet_cifar10_half_profiling.cc
-../dnn_sources/src/half/profiling/resnet18_cifar10_half_profiling.cc
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_remainder.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_remainder.txt
deleted file mode 100644
index 20ca95abcf1ee1aab337fa391abb5f1a74583fe1..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_remainder.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-../dnn_sources/src/half/profiling/mobilenet_depthwise_half_profiling.cc
-../dnn_sources/src/half/profiling/mobilenet_shallow_depthwise_half_profiling.cc
-../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc
-../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_sources.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_sources.txt
deleted file mode 100644
index 506497e42889dc1d8bb2465912e87f56464e7ecc..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_sources.txt
+++ /dev/null
@@ -1 +0,0 @@
-../dnn_sources/src/half/lenet_keras_half.cc
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32.txt
deleted file mode 100644
index 12b87930416c4269a62f2020a06b42cf5cf4dc13..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-../dnn_sources/src/profiling/alexnet2_profiling.cc
-../dnn_sources/src/profiling/alexnet_cifar10_profiling.cc
-../dnn_sources/src/profiling/mobilenet_cifar10_profiling.cc
-../dnn_sources/src/profiling/mobilenet_shallow_profiling.cc
-../dnn_sources/src/profiling/mobilenet_depthwise_profiling.cc
-../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc
-../dnn_sources/src/profiling/resnet18_cifar10_profiling.cc
-../dnn_sources/src/profiling/vgg16_cifar100_profiling.cc
-../dnn_sources/src/profiling/vgg16_cifar10_profiling.cc
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_sources.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_sources.txt
deleted file mode 100644
index cd8f03c30712f0162db2cc8bcf563087be05bf64..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_sources.txt
+++ /dev/null
@@ -1 +0,0 @@
-../dnn_sources/src/lenet_keras.cc
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_test.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_test.txt
deleted file mode 100644
index a59f773cda240a311c0c873c9366494018b87312..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_test.txt
+++ /dev/null
@@ -1 +0,0 @@
-../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_mobilenet_depth.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_mobilenet_depth.txt
deleted file mode 100644
index 2b7382da3570917c1983ad0c3fe02763d8565635..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_mobilenet_depth.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-../dnn_sources/src/profiling/mobilenet_depthwise_profiling.cc
-../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_one_file.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_one_file.txt
deleted file mode 100644
index 32b18d4ca22672be6b44ecb674ea3ad00e18276d..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_one_file.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc
-../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16.txt
deleted file mode 100644
index 207eb1ed1f45ffde7dad0da4e125aa0ceaa5c5cd..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-perf,20 1,1,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
-perf,21 1,2,1,0   2.25    tensorHalfConvolution   tensorConvApproxHalf
-perf,22 1,2,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
-perf,23 1,3,1,0   1.88    tensorHalfConvolution   tensorConvApproxHalf
-perf,24 1,3,1,1   1.88    tensorHalfConvolution   tensorConvApproxHalf
-perf,25 1,3,1,2   1.88    tensorHalfConvolution   tensorConvApproxHalf
-perf,26 2,1,1,0   2.25    tensorHalfConvolution   tensorConvApproxHalf
-perf,27 2,1,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
-perf,28 3,1,1,0   1.88    tensorHalfConvolution   tensorConvApproxHalf
-perf,29 3,1,1,1   1.88    tensorHalfConvolution   tensorConvApproxHalf
-perf,30 3,1,1,2   1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,31 1,1,2,0     1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,32 1,1,2,1     1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,33 1,1,4,0     1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,34 1,1,4,1     1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,35 1,1,4,2     1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,36 1,1,4,3     1.88    tensorHalfConvolution   tensorConvApproxHalf
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_knobs_31_36.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_knobs_31_36.txt
deleted file mode 100644
index fc76565110cf34ab57024dd852c1a51b23a8f45e..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_knobs_31_36.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-samp,31 1,1,2,0     1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,32 1,1,2,1     1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,33 1,1,4,0     1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,34 1,1,4,1     1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,35 1,1,4,2     1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,36 1,1,4,3     1.88    tensorHalfConvolution   tensorConvApproxHalf
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_old.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_old.txt
deleted file mode 100644
index 72c43e61288c532feed94f5768357b3113d5de49..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_old.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-perf,20 1,1,0   2.25    tensorHalfConvolution   tensorConvPerfCudaHalf
-perf,21 1,2,0   2.25    tensorHalfConvolution   tensorConvPerfCudaHalf
-perf,22 1,2,1   2.25    tensorHalfConvolution   tensorConvPerfCudaHalf
-perf,23 1,3,0   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
-perf,24 1,3,1   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
-perf,25 1,3,2   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
-perf,26 2,1,0   2.25    tensorHalfConvolution   tensorConvPerfCudaHalf
-perf,27 2,1,1   2.25    tensorHalfConvolution   tensorConvPerfCudaHalf
-perf,28 3,1,0   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
-perf,29 3,1,1   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
-perf,30 3,1,2   1.88    tensorHalfConvolution   tensorConvPerfCudaHalf
-samp,31 2,0     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,32 2,1     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,33 4,0     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,34 4,1     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,35 4,2     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,36 4,3     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,37 1,1     1.88    tensorHalfConvolution   tensorConvInputHalf
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_samp.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_samp.txt
deleted file mode 100644
index 0f0593226f6fbeddda91046e7416fe108bfb6d90..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_samp.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-samp,31 2,0     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,32 2,1     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,33 4,0     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,34 4,1     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,35 4,2     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,36 4,3     1.88    tensorHalfConvolution   tensorConvInputHalf
-samp,37 1,1     1.88    tensorHalfConvolution   tensorConvInputHalf
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_vgg16.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_vgg16.txt
deleted file mode 100644
index a172a4e515ebfd24a51267da8bac2cb5f13ce6c0..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_vgg16.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-perf,20 1,1,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
-perf,21 1,2,1,0   2.25    tensorHalfConvolution   tensorConvApproxHalf
-perf,22 1,2,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
-perf,23 1,3,1,0   1.88    tensorHalfConvolution   tensorConvApproxHalf
-perf,24 1,3,1,1   1.88    tensorHalfConvolution   tensorConvApproxHalf
-perf,25 1,3,1,2   1.88    tensorHalfConvolution   tensorConvApproxHalf
-perf,26 2,1,1,0   2.25    tensorHalfConvolution   tensorConvApproxHalf
-perf,27 2,1,1,1   2.25    tensorHalfConvolution   tensorConvApproxHalf
-perf,28 3,1,1,0   1.88    tensorHalfConvolution   tensorConvApproxHalf
-perf,29 3,1,1,1   1.88    tensorHalfConvolution   tensorConvApproxHalf
-perf,30 3,1,1,2   1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,32 1,1,2,1     1.88    tensorHalfConvolution   tensorConvApproxHalf
-samp,36 1,1,4,3     1.88    tensorHalfConvolution   tensorConvApproxHalf
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32.txt
deleted file mode 100644
index 78f3e361ee8a96c6520793b435815210e1fc7117..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-perf,20 1,1,1,1   2.25    tensorConvolution   tensorConvApprox
-perf,21 1,2,1,0   2.25    tensorConvolution   tensorConvApprox
-perf,22 1,2,1,1   2.25    tensorConvolution   tensorConvApprox
-perf,23 1,3,1,0   1.88    tensorConvolution   tensorConvApprox
-perf,24 1,3,1,1   1.88    tensorConvolution   tensorConvApprox
-perf,25 1,3,1,2   1.88    tensorConvolution   tensorConvApprox
-perf,26 2,1,1,0   2.25    tensorConvolution   tensorConvApprox
-perf,27 2,1,1,1   2.25    tensorConvolution   tensorConvApprox
-perf,28 3,1,1,0   1.88    tensorConvolution   tensorConvApprox
-perf,29 3,1,1,1   1.88    tensorConvolution   tensorConvApprox
-perf,30 3,1,1,2   1.88    tensorConvolution   tensorConvApprox
-samp,31 1,1,2,0     1.88    tensorConvolution   tensorConvApprox
-samp,32 1,1,2,1     1.88    tensorConvolution   tensorConvApprox
-samp,33 1,1,4,0     1.88    tensorConvolution   tensorConvApprox
-samp,34 1,1,4,1     1.88    tensorConvolution   tensorConvApprox
-samp,35 1,1,4,2     1.88    tensorConvolution   tensorConvApprox
-samp,36 1,1,4,3     1.88    tensorConvolution   tensorConvApprox
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_baseline.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_baseline.txt
deleted file mode 100644
index df001ba497d0ed440dd34beead33d607651d3f35..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_baseline.txt
+++ /dev/null
@@ -1 +0,0 @@
-perf,20 1,1,1,1   2.25    tensorConvolution   tensorConvApprox
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_old.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_old.txt
deleted file mode 100644
index 36a7dbca05ef71b6046a91066acf5382f2a5c7a3..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_old.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-perf,20 1,1,0   2.25    tensorConvolution   tensorConvPerfCuda
-perf,21 1,2,0   2.25    tensorConvolution   tensorConvPerfCuda
-perf,22 1,2,1   2.25    tensorConvolution   tensorConvPerfCuda
-perf,23 1,3,0   1.88    tensorConvolution   tensorConvPerfCuda
-perf,24 1,3,1   1.88    tensorConvolution   tensorConvPerfCuda
-perf,25 1,3,2   1.88    tensorConvolution   tensorConvPerfCuda
-perf,26 2,1,0   2.25    tensorConvolution   tensorConvPerfCuda
-perf,27 2,1,1   2.25    tensorConvolution   tensorConvPerfCuda
-perf,28 3,1,0   1.88    tensorConvolution   tensorConvPerfCuda
-perf,29 3,1,1   1.88    tensorConvolution   tensorConvPerfCuda
-perf,30 3,1,2   1.88    tensorConvolution   tensorConvPerfCuda
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_to_fp16.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_to_fp16.txt
deleted file mode 100644
index 913397cc4936bf11f3eefa15b5804700865e7b6b..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_to_fp16.txt
+++ /dev/null
@@ -1 +0,0 @@
-fp16,12 0   1.5     tensorConvolution   tensorHalfConvolution
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_vgg16.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_vgg16.txt
deleted file mode 100644
index 6fbab7d7b85255cd86748634faea0bf48ed75e42..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_vgg16.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-perf,20 1,1,1,1   2.25    tensorConvolution   tensorConvApprox
-perf,21 1,2,1,0   2.25    tensorConvolution   tensorConvApprox
-perf,22 1,2,1,1   2.25    tensorConvolution   tensorConvApprox
-perf,23 1,3,1,0   1.88    tensorConvolution   tensorConvApprox
-perf,24 1,3,1,1   1.88    tensorConvolution   tensorConvApprox
-perf,25 1,3,1,2   1.88    tensorConvolution   tensorConvApprox
-perf,26 2,1,1,0   2.25    tensorConvolution   tensorConvApprox
-perf,27 2,1,1,1   2.25    tensorConvolution   tensorConvApprox
-perf,28 3,1,1,0   1.88    tensorConvolution   tensorConvApprox
-perf,29 3,1,1,1   1.88    tensorConvolution   tensorConvApprox
-perf,30 3,1,1,2   1.88    tensorConvolution   tensorConvApprox
-samp,32 1,1,2,1     1.88    tensorConvolution   tensorConvApprox
-samp,36 1,1,4,3     1.88    tensorConvolution   tensorConvApprox
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_test.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_test.txt
deleted file mode 100644
index 68686b25de1c607e34d75044cd7ff19cf0c8890a..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_test.txt
+++ /dev/null
@@ -1 +0,0 @@
-fp16,12 0   1.5     tensorHalfConvolution   tensorHalfConvolution
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/source_code_autogenerator.py b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/source_code_autogenerator.py
deleted file mode 100644
index 589cdd0f4fe05cb8e9844ba9ac3dccd73133f09f..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/source_code_autogenerator.py
+++ /dev/null
@@ -1,463 +0,0 @@
-# Input: file of the following table format
-#   id    knob configurations (arbitrary # of columns)   orig_func_name     new_func_name 
-# Input: file containing list of filenames to generate modified sources for 
-# Generates:
-#   a new directory called <original_source_nane>_different_knobs
-#   files named <original_source_name>_<id>.txt within their respective directories
-
-import glob
-import sys
-import os
-import re
-import shutil
-
-class Approx:
-    FP32 = 0
-    FP16 = 1
-    PERF = 2
-    SAMP = 3
-
-class KnobConfiguration:
-    '''
-    Stores the configurations as well as other useful information for each knob configuration
-    Stores: id (may factor out if ids are guaranteed to start at 0/1 and be consecutive)
-            original function name
-            modified function name
-            new function parameters (knobs)
-            new function call (modified function name(knobs)) 
-    '''
-    def __init__(self, raw_config):
-        '''
-        Args: raw_config = line of configuration file to parse
-        '''
-        line_as_lst = raw_config.strip().split()
-        # approx,<id> knob1,knob2,etc IGNORE old_fun_name new_fun_name
-
-        approx_id_lst = line_as_lst[0].split(',')
-        assert len(approx_id_lst) == 2
-
-        self.id = int(approx_id_lst[1])
-
-        if approx_id_lst[0] == "fp32":
-            self.approx = Approx.FP32
-            self.filename_ext = "fp32_converted"
-            return # special case 
-        elif approx_id_lst[0] == "fp16":
-            self.approx = Approx.FP16
-            self.filename_ext = "fp16_converted"
-            return # special case
-        elif approx_id_lst[0] == "perf":
-            self.approx = Approx.PERF
-        elif approx_id_lst[0] == "samp":
-            self.approx = Approx.SAMP
-
-        self.orig_func_name = line_as_lst[-2] # Second to last element
-        self.modified_func_name = line_as_lst[-1] # Last element  
-        self.params = line_as_lst[1].split(",") # First element = knob configuration 
-        self.filename_ext = approx_id_lst[0] + "_" + "_".join(self.params) # approx_method_knobs
-
-
-    # DEBUG
-    def __repr__(self):
-        if self.approx == Approx.FP32:
-            return "FP32"
-        elif self.approx == Approx.FP16:
-            return "FP16"
-
-        approx_type = None
-        if self.approx == Approx.PERF:
-            approx_type = "PERF"
-        elif self.approx == Approx.SAMP:
-            approx_type = "SAMP"
-        return "Approx: %s, ID: %d, Orig func nane: %s, Modified func nane: %s, Params: %s" \
-                % (approx_type, self.id, self.orig_func_name, self.modified_func_name, \
-                   ', '.join(self.params))
-
-
-def get_new_path(old_path, orig_source_code_dir):
-    '''
-    Returns a path that's compatible with the location of the generated source code
-
-    Args:
-        old_path: Original path of file that's being included
-        orig_source_code_dir: Path to original source code dir wrt the current dir
-    '''
-    if os.path.isabs(old_path): # Old path works
-        return old_path 
-    # Adding an extra .. because the path should be wrt the generated directory
-    return os.path.join("..", orig_source_code_dir, old_path)
-
-
-# "complete_line" = a valid line of code  
-def get_new_function_calls(complete_line, knob_config):
-    '''
-    Returns a copy of an inputted line of code such that all instances of old 
-    function calls are replaced with newFunctionCall(old params, knobs)
-
-    Note: The old calls aren't completely overriden, as we still need the old parameters but
-    insert new parameters as well
-
-    Args:
-        complete_line: A complete line of code to process
-        knob_config: KnobConfiguration object representing current configuration 
-    '''
-    orig_func_ind = complete_line.find(knob_config.orig_func_name)
-    new_line = []
-    line_start_ind = 0
-    last_ind = 0
-
-    while orig_func_ind != -1:
-        new_line.append(complete_line[line_start_ind : orig_func_ind])
-        line_start_ind = complete_line.find(")", orig_func_ind) + 1 
-        
-        old_func_call = complete_line[complete_line.find("(", orig_func_ind): line_start_ind]
-        if knob_config.modified_func_name == knob_config.orig_func_name:
-            # count the number of new parameters
-            num_repl_params = len(knob_config.params)
-            old_func_params = old_func_call.strip().split(',')
-            new_line.append("%s%s, %s)" % (knob_config.modified_func_name, ', '.join(old_func_params[:-num_repl_params]), ', '.join(knob_config.params)))
-
-        else:
-            new_line.append("%s%s, %s)" % (knob_config.modified_func_name, old_func_call[:-1], ', '.join(knob_config.params)))
-        orig_func_ind = complete_line.find(knob_config.orig_func_name, line_start_ind)
-    new_line.append(complete_line[line_start_ind : ])
-    return ''.join(new_line)
-
-
-def convert_local_paths(file_contents, orig_source_dir): 
-    '''
-    Converts all local paths wrt the original source file's directory to paths compatible
-    with the current source code directory
-
-    Args:
-        file_contents: String containing source code read from file
-        orig_source_dir: Path of original source code dir wrt the current directory 
-    '''
-    last_include_ind = file_contents.rfind("#include")
-    last_include_newline_ind = file_contents.find("\n", last_include_ind)
-    include_lines = file_contents[ : last_include_newline_ind].split("\n")
-    
-    new_file_contents = []
-    for line in include_lines:
-        if line.startswith("#"):
-            include_file = line.split()[1]
-            if include_file.startswith("\""):
-                new_include_path = get_new_path(include_file.replace("\"", ""), orig_source_dir.replace("\"", ""))
-                new_file_contents.append("#include \"%s\"\n" % new_include_path)
-            else:
-                new_file_contents.append(line)
-    new_file_contents.append(file_contents[last_include_newline_ind : ])
-    return '\n'.join(new_file_contents)
-
-
-def get_tensor_operation(line):
-    tensor_func_call = None
-    tensor_op_name = None
-    start_ind = None
-    end_ind = None
-
-    start_ind = line.find("tensor")
-    if start_ind != -1:
-        end_ind = line.find('(')
-        tensor_op_name = line[start_ind + len("tensor") : end_ind]
-        if tensor_op_name[0].isupper(): # crude way of checking whether we have a camel cased method
-            end_ind = line.find('(')
-            tensor_func_call = line[start_ind : end_ind]
-    return tensor_func_call, tensor_op_name, start_ind, end_ind
-
-
-def generate_fp32_source(new_file, source_file, orig_source_dir):
-    # Copy the source code over 
-    new_file_contents = convert_local_paths(source_file.read(), orig_source_dir)
-    new_file.write(new_file_contents)
-
-
-def generate_fp16_source(knob_config, new_file, source_file, orig_source_dir):
-    file_contents = convert_local_paths(source_file.read(), orig_source_dir).split('\n')
-
-    new_file_contents = []
-    for line in file_contents:
-        tensor_func_call, tensor_op_name, start_ind, end_ind = get_tensor_operation(line)
-        # tensorHalfSoftmax doesn't exist 
-        if line.find("Softmax") == -1 and tensor_func_call:
-            new_file_contents.append(line[ : start_ind] + "tensorHalf" + tensor_op_name \
-                        + line[end_ind : ])
-        else:
-            new_file_contents.append(line)
-    new_file.write('\n'.join(new_file_contents))
-
-
-def generate_approx_source(knob_config, new_file, source_file, orig_source_dir):
-    new_file_contents = []
-
-    # Store complete line to handle cases where one line of code is split into two lines
-    complete_line = ""
-    for line in source_file:
-        # Replace the current path of the local include with a path that's compatible 
-        # with the location of the generated source code 
-        if line.startswith("#"):
-            include_file = line.split()[1]
-            if include_file.startswith("\""):
-                new_include_path = get_new_path(include_file.replace("\"", ""), orig_source_dir.replace("\"", ""))
-                new_file_contents.append("#include \"%s\"\n" % new_include_path)
-            else:
-                new_file_contents.append(line)
-            continue
-
-        # Handles case where 1 actual line of code is split into 2 lines 
-        elif line.find("}") != -1 or line.find("{") != -1:
-            complete_line += line
-            new_file_contents.append(complete_line)
-            complete_line = ""
-            continue
-
-        elif line.find(";") == -1: # Last char is always \n
-            complete_line += line
-            continue
-
-        complete_line += line
-        orig_func_ind = complete_line.find(knob_config.orig_func_name)
-        if orig_func_ind != -1:
-            new_file_contents.append(get_new_function_calls(complete_line, knob_config))
-        else:
-            new_file_contents.append(complete_line)
-        complete_line = ""
-    new_file.write(''.join(new_file_contents))
-
-
-def generate_source_code(table, dir_name, filename, source_name, profile_per_tensor):
-    '''
-    Generates source code for all configurations in the table for one original source 
-    Args
-        table: List of KnobConfigurations
-        dir_name: Directory new sources should be placed in
-        filename: Filename of original source
-        source_name: Filename without the file extension (ex: foo/blah.cc --> blah)
-    '''
-    source_file = open(filename, "r") 
-    orig_source_dir = os.path.dirname(filename)
-
-    for knob_config in table:
-        source_file.seek(0, 0)
-        new_filename = os.path.join(dir_name, "%s_%s.cc" % (source_name, knob_config.id))
-        #new_filename = os.path.join(dir_name, "%s_%s.cc" % (source_name, knob_config.filename_ext)) 
-        new_file = open(new_filename, "w")
-        if knob_config.approx == Approx.FP16:
-            generate_fp16_source(knob_config, new_file, source_file, orig_source_dir)
-        elif knob_config.approx == Approx.FP32:
-            generate_fp32_source(new_file, source_file, orig_source_dir)
-        elif knob_config.approx == Approx.PERF or knob_config.approx == Approx.SAMP:
-            generate_approx_source(knob_config, new_file, source_file, orig_source_dir)
-
-        new_file.close() # Need to flush
-
-        if profile_per_tensor:
-            add_profiling_calls_per_tensor(new_filename)
-        print("Generated source code as %s" % new_filename)
-    source_file.close()
-
-
-def generate_all_sources(table, orig_files_filename, profile_per_tensor):
-    '''
-    Generates directories and source code for all original sources for all knob configurations
-    Args:
-        table: List of KnobConfiguration objects
-        orig_files_filename: Filename of file containing all original source names to generate new
-               sources for 
-    '''
-    orig_files = open(orig_files_filename, "r")
-    for orig_filename in orig_files:
-        orig_filename = orig_filename.strip()
-
-        # Source name = original filename without the .cc 
-        last_slash_ind = orig_filename.rfind("/")
-        file_ext_ind = orig_filename.find(".cc")
-        if last_slash_ind == -1:
-            source_name = orig_filename[ : file_ext_ind]
-        else:
-            source_name = orig_filename[last_slash_ind + 1 : file_ext_ind]
-        print("Source name: %s" % source_name)
-       
-        # Start with a clean directory
-        dir_name = "%s_autogenerated_knobs" % source_name
-        print("Setting up directory: %s" % dir_name)
-        if os.path.isdir(dir_name):
-            print("Directory exists: clearing everything")
-            for old_file in glob.glob(os.path.join(dir_name, "*")):
-                os.remove(old_file)
-
-        else:
-            print("Generating directory: %s" % dir_name)
-            os.makedirs(dir_name)
-            
-        generate_source_code(table, dir_name, orig_filename, source_name, profile_per_tensor)
-        print("\n")
-    orig_files.close()
-
-
-# This is a lazy approach but it works so ...
-def add_profiling_calls_per_tensor(source_filename):
-    source_file = open(source_filename, "r")
-    orig_source = source_file.read().split('\n')
-    source_file.close()
-
-    modified_source = []
-
-    init_profiler_cmd = "%sProfiler profiler;"
-    start_profiler_call = "%sprofiler.start_profiler();"
-    resume_profiler_call = "%sprofiler.resume_profiler();"
-    pause_profiler_call = "%sprofiler.pause_profiler();"
-    stop_profiler_call = "%sprofiler.stop_profiler();"
-    time_energy_profiler_call = "%sauto time_energy_%d = profiler.get_time_energy();"
-    reset_profiler_call = "%sprofiler.reset();"
-
-    time_var_decl = "%sdouble %s_time = 0.0;"
-    time_energy_decl = "%sdouble %s_energy = 0.0;"
-
-    time_incr_cmd = "%s%s_time += time_energy_%d.first;"
-    energy_incr_cmd = "%s%s_energy += time_energy_%d.second;"
-
-    output_per_tensor = "%sstd::cout << \"Operation %s, time: \" << (%s_time) / total_runs <<\", energy: \" << (%s_energy) / total_runs << std::endl; "
-    total_output = "%sstd::cout << \"Total %s: \" << (%s) / total_runs << std::endl;"
-
-    time_energy_count = 0
-
-    tensor_operations = set() 
-    for line in orig_source:
-        line = line.strip()
-        tensor_func_call, _, _, _ = get_tensor_operation(line)
-        if tensor_func_call: tensor_operations.add(tensor_func_call)
-
-    inserted_end_profiler_call = False
-    has_seen_for_loop = False 
-    close_bracket_count = 0
-    line_ind = 0
-
-    while line_ind < len(orig_source):
-        line = orig_source[line_ind]
-        num_leading_spaces = len(line) - len(line.lstrip())
-        leading_spaces_str = ' '.join(["" for _ in range(num_leading_spaces)])
-
-        if line.find("for") != -1:
-            has_seen_for_loop = True
-
-        if has_seen_for_loop and line.find("}") != -1:
-            close_bracket_count += 1
-
-        if line.find("#include") != -1:
-            modified_source.append(leading_spaces_str + line)
-            line_ind += 1
-            continue
-
-        if line.find("profiler") != -1 or line.find("total_time") != -1 or line.find("total_energy") != -1:
-            line_ind += 1
-            continue
-
-        if line.find("int total_runs") != -1:
-            # Now we insert the counters
-            for op_name in tensor_operations:
-                modified_source.append(time_var_decl % (leading_spaces_str, op_name))
-                modified_source.append(time_energy_decl % (leading_spaces_str, op_name))
-            modified_source.append(line)
-            modified_source.append(init_profiler_cmd % leading_spaces_str)
-            modified_source.append(start_profiler_call % leading_spaces_str)
-            line_ind += 1
-            continue
-
-        if close_bracket_count == 2 and not inserted_end_profiler_call: # NOTE this breaks if there are helper methods/scopes
-            modified_source.append(line)
-            total_time_str = []
-            total_energy_str = []
-
-            for op_name in tensor_operations:
-                modified_source.append(output_per_tensor % (leading_spaces_str, op_name, op_name, op_name))
-                total_time_str.append("%s_time" % op_name)
-                total_energy_str.append("%s_energy" % op_name)
-
-            modified_source.append(total_output % (leading_spaces_str, "time", ' + '.join(total_time_str)))
-            modified_source.append(total_output % (leading_spaces_str, "energy", ' + '.join(total_energy_str)))
-
-            modified_source.append(stop_profiler_call % leading_spaces_str)
-            line_ind += 1
-            inserted_end_profiler_call = True
-            continue
-
-        tensor_ind = line.find("tensor")
-        if tensor_ind == -1:
-            modified_source.append(line)
-            line_ind += 1
-            continue
-
-        word_after_tensor = line[tensor_ind + len("tensor")]
-        if word_after_tensor[0].isupper(): # crude way of checking whether we have a camel cased method
-            tensor_op = line[tensor_ind : line.find('(')]
-
-            modified_source.append(resume_profiler_call % leading_spaces_str)
-            modified_source.append(line) 
-            
-            # Address one line that's split up into 2 lines for readability 
-            if line.find(")") == -1 and line_ind + 1 < len(orig_source) \
-                        and orig_source[line_ind + 1].find(")") != -1:
-                line_ind += 1
-                modified_source.append(orig_source[line_ind]) 
-
-            modified_source.append(pause_profiler_call % leading_spaces_str)
-            modified_source.append(time_energy_profiler_call % (leading_spaces_str, time_energy_count))
-            modified_source.append(time_incr_cmd % (leading_spaces_str, tensor_op, time_energy_count))
-            modified_source.append(energy_incr_cmd % (leading_spaces_str, tensor_op, time_energy_count))
-            modified_source.append(reset_profiler_call % leading_spaces_str)
-            modified_source.append("")
-
-            time_energy_count += 1
-        else:
-            modified_source.append(line)
-        line_ind += 1
-
-    source_file = open(source_filename, "w")
-    source_file.write('\n'.join(modified_source))
-    source_file.close()
-
-
-def parse_table(table_filename):
-    '''
-    Given the filename of a table, parses the table into a list of KnobConfigurations 
-    '''
-    # Can we assume that the ids always start at 1 --> if so, can index by knobs 
-    # else: need to use a dict
-    table = []
-    table_file = open(table_filename, "r")
-    for raw_config in table_file:
-        table.append(KnobConfiguration(raw_config))
-    table_file.close()  
-    return table
-   
-
-def delete_autogenerated_dirs():
-    for dir_name in os.listdir("."):
-        if dir_name.endswith("profiling_autogenerated_knobs"):
-            print("DELETING %s" % dir_name)
-            shutil.rmtree(dir_name)
-    print("DONE")
-
-if __name__ == "__main__":
-    num_args = len(sys.argv)
-
-    if num_args == 2 and sys.argv[1] == "clean":
-        delete_autogenerated_dirs()
-        exit(0)
-
-    if num_args != 3 and num_args != 4:
-        print("Usage: python source_code_autogenerator.py <table file> <original filenames file> [per_tensor]")
-        print("To delete autogen dirs: python source_code_autogenerator.py clean")
-
-        if num_args >= 2 and sys.argv[1] == "--usage":
-            print("Table file format: <id> <knob configurations separated by spaces> <orig func name> <new func name>")
-            print("Original filenames file: <original_filename><newline> etc")
-        else:
-            print("Run with --usage flag for more detailed information")
-        exit(1)
-
-    profile_per_tensor = num_args == 4 and sys.argv[3] == "per_tensor"
-
-    table = parse_table(sys.argv[1])
-    generate_all_sources(table, sys.argv[2], profile_per_tensor)
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling.cpp b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling.cpp
deleted file mode 100644
index f7da755535f3b31c86e4c82801458a02930dc02a..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../../dnn_sources/include/utils.h"
-#include <vector>
-
-extern void llvm_hpvm_initTensorRt(int gpuid);
-
-extern void llvm_hpvm_clearRuntimeController();
-
-const size_t batch_size = 500;
-
-int main(int argc, char *argv[]) {
-	const char *input1_path = "../model_params/image_processing_5k";
-
-	llvm_hpvm_initTensorRt(0);
-	size_t bstart = 0;
-	startMemTracking();
-	while (true) {
-		Tensor *batch = readDataSet(input1_path, bstart, batch_size);
-		if (batch == nullptr) // If end of dataset
-			break;
-
-		void* dataset = batch;
-		float sigma = 1.4;
-		size_t w = 5;
-		size_t h = 5;
-		size_t n_chan = 1;
-		int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-		auto *data = new float[w * h];
-		float sum = 0.0f;
-		for (int64_t i = -m; i <= m; i++)
-			for (int64_t j = -n; j <= n; j++) {
-				size_t idx = (i + m) * h + (j + n);
-				float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-				data[idx] = exp(exponent);
-				sum += data[idx];
-			}
-		if (sum != 0.0f)
-			for (size_t i = 0; i < w * h; i++)
-				data[i] /= sum;
-		Tensor* gaussian = (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
-		Tensor *kernel_x, *kernel_y;
-
-		std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1});
-		std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1});
-		auto *t1 =
-			(Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1);
-		auto *t2 =
-			(Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1);
-		std::tie(kernel_x, kernel_y) = std::make_pair(t1, t2);
-
-		// 0. Grayscale
-		auto *summed_image = tensorReduce(dataset, 1, MathOp::Add, 0.0f);
-		auto *grayscale_image = tensorMap1(MathOp::Avg3, summed_image);
-		// 1. Denoise
-		auto *image2 =
-			tensorConvApprox(grayscale_image, gaussian, 2, 2, 1, 1, 1, 1, 1, 1, 1, 0);
-		// 2. Get edge gradient / direction
-		auto *grad_x =
-			tensorConvApprox(image2, kernel_x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0);
-		auto *grad_y =
-			tensorConvApprox(image2, kernel_y, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0);
-		auto *grad_mag = tensorMap2(MathOp::Hypot, grad_x, grad_y);
-		// 2.5. Normalize grad magnitude
-		auto *grad_max_1D = tensorReduce(grad_mag, 2, MathOp::Max, 0.0f);
-		auto *grad_max = tensorReduce(grad_max_1D, 3, MathOp::Max, 0.0f);
-		auto *grad_mag_norm = tensorMap2(MathOp::Div, grad_mag, grad_max);
-		auto* result = grad_mag_norm;
-
-		bstart += batch_size;
-		freeBatchMemory();
-	}
-	llvm_hpvm_clearRuntimeController();
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling_half.cc b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling_half.cc
deleted file mode 100644
index 18462e2c9afe983d34ceb461ed306078f8a50771..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling_half.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-#include "../../tensor_runtime/include/tensor_runtime.h"
-#include "../../dnn_sources/include/utils.h"
-#include <vector>
-
-extern void llvm_hpvm_initTensorRt(int gpuid);
-
-extern void llvm_hpvm_clearRuntimeController();
-
-const size_t batch_size = 500;
-
-int main(int argc, char *argv[]) {
-	const char *input1_path = "../model_params/image_processing_5k";
-
-	llvm_hpvm_initTensorRt(0);
-	size_t bstart = 0;
-	startMemTracking();
-	while (true) {
-		Tensor *batch = readDataSet(input1_path, bstart, batch_size);
-		if (batch == nullptr) // If end of dataset
-			break;
-
-		void* dataset = batch;
-		float sigma = 1.4;
-		size_t w = 5;
-		size_t h = 5;
-		size_t n_chan = 1;
-		int64_t m = (w - 1) / 2, n = (h - 1) / 2;
-		auto *data = new float[w * h];
-		float sum = 0.0f;
-		for (int64_t i = -m; i <= m; i++)
-			for (int64_t j = -n; j <= n; j++) {
-				size_t idx = (i + m) * h + (j + n);
-				float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
-				data[idx] = exp(exponent);
-				sum += data[idx];
-			}
-		if (sum != 0.0f)
-			for (size_t i = 0; i < w * h; i++)
-				data[i] /= sum;
-		Tensor* gaussian = (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan);
-		Tensor *kernel_x, *kernel_y;
-
-		std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1});
-		std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1});
-		auto *t1 =
-			(Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1);
-		auto *t2 =
-			(Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1);
-		std::tie(kernel_x, kernel_y) = std::make_pair(t1, t2);
-
-		// 0. Grayscale
-		auto *summed_image = tensorReduceHalf(dataset, 1, MathOp::Add, 0.0f);
-		auto *grayscale_image = tensorMap1Half(MathOp::Avg3, summed_image);
-		// 1. Denoise
-		auto *image2 =
-			tensorConvApproxHalf(grayscale_image, gaussian, 2, 2, 1, 1, 1, 1, 1, 1, 1, 0);
-		// 2. Get edge gradient / direction
-		auto *grad_x =
-			tensorConvApproxHalf(image2, kernel_x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0);
-		auto *grad_y =
-			tensorConvApproxHalf(image2, kernel_y, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0);
-		auto *grad_mag = tensorMap2Half(MathOp::Hypot, grad_x, grad_y);
-		// 2.5. Normalize grad magnitude
-		auto *grad_max_1D = tensorReduceHalf(grad_mag, 2, MathOp::Max, 0.0f);
-		auto *grad_max = tensorReduceHalf(grad_max_1D, 3, MathOp::Max, 0.0f);
-		auto *grad_mag_norm = tensorMap2Half(MathOp::Div, grad_mag, grad_max);
-		auto* result = grad_mag_norm;
-
-		bstart += batch_size;
-		freeBatchMemory();
-	}
-	llvm_hpvm_clearRuntimeController();
-}
diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/approx_techniques_back.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/approx_techniques_back.cu
deleted file mode 100644
index 25432c4e3283bfd6062adcef1fcfe7326fc8737d..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/approx_techniques_back.cu
+++ /dev/null
@@ -1,862 +0,0 @@
-
-
-#include "tensor_utils.h"
-#include "fp16_gemm.h"
-#include "debug.h"
-#include "global_data.h"
-#include "profiling.h"
-
-
-extern "C"{
-
-
-
-__global__
-void depthwise_conv(float* const __restrict__ y,
-const float* const __restrict__ x,
-const float* const __restrict__ w,
-const int B, const int M,
-const int H, const int W, const int KH,
-const int KW, const int H_out, const int W_out,
-const int H_pad, const int W_pad,
-const int H_stride, const int W_stride, const int start_batch)
-{
-
-#define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0]
-#define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0]
-
-const int num = 1;
-
-const int b = num * blockIdx.x + start_batch;
-const int m = blockIdx.y; //current filter/channel
-
-const int tx = threadIdx.x;
-
-const int start_h = (threadIdx.x / W_out) * H_stride - H_pad;
-const int start_w = (threadIdx.x % W_out) * W_stride - W_pad;
-
-float C[num] = { 0 };
-
-const float* weights = &w[m * KH * KW];
-
-for (int k = 0; k < KH * KW; k++) {
-int p = k / KW;
-int q = k % KW;
-
-#pragma unroll
-for (int i = 0; i < num; i++) {
-if (start_h + p > -1 && start_h + p < H &&
-start_w + q > -1 && start_w + q < W) {
-
-C[i] += x4d(b + i, m, start_h + p, start_w + q) * weights[k];
-}
-
-}
-}
-
-#pragma unroll
-for (int i = 0; i < num; i++) {
-if(b + i < B)
-y4d(b + i, m, 0, tx) = C[i];
-
-}
-
-
-#undef y4d 
-#undef x4d
-}
-
-
-__global__
-void depthwise_convNew(float* const __restrict__ y,
-const float* const __restrict__ x,
-const float* const __restrict__ w,
-const int B, const int M,
-const int H, const int W, const int KH,
-const int KW, const int H_out, const int W_out,
-const int H_pad, const int W_pad,
-const int H_stride, const int W_stride)
-{
-
-#define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0]
-#define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0]
-
-const int num = 12;
-
-const int b = num * blockIdx.x;
-const int m = (blockIdx.y * blockDim.x  + threadIdx.x)/ (H_out * W_out); 
-
-const int tx = (blockIdx.y * blockDim.x  + threadIdx.x) % (H_out * W_out);
-
-const int start_h = (tx / W_out) * H_stride - H_pad;
-const int start_w = (tx % W_out) * W_stride - W_pad;
-
-float C[num] = { 0 };
-
-const float* weights = &w[m * KH * KW];
-
-for (int k = 0; k < KH * KW; k++) {
-int p = k / KW;
-int q = k % KW;
-
-if (start_h + p > -1 && start_h + p < H &&
-start_w + q > -1 && start_w + q < W) {
-
-#pragma unroll
-for (int i = 0; i < num; i++) {
-if(b + i < B)
-C[i] += x4d(b + i, m, start_h + p, start_w + q) * weights[k];
-}
-
-}
-}
-
-#pragma unroll
-for (int i = 0; i < num; i++) {
-if(b + i < B)
-y4d(b + i, m, 0, tx) = C[i];
-
-}
-
-#undef y4d 
-#undef x4d
-}
-
-
-
-
-__global__ void depthwise_convNew8_half(__half* const __restrict__ y,
-					const __half* const __restrict__ x,
-					const __half* const __restrict__ w,
-					const int B, const int M,
-					const int H, const int W, const int KH,
-					const int KW, const int H_out, const int W_out,
-					const int H_pad, const int W_pad,
-					const int H_stride, const int W_stride)
-{
-
-  #define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0]
-  #define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0]
-
-  const int num = 8;
-
-  const int b = num * blockIdx.x;
-  const int m = (blockIdx.y * blockDim.x  + threadIdx.x)/ (H_out * W_out);
-	
-  if(m < M){
-    const int tx = (blockIdx.y * blockDim.x  + threadIdx.x) % (H_out * W_out);
-
-    const int start_h = (tx / W_out) * H_stride - H_pad;
-    const int start_w = (tx % W_out) * W_stride - W_pad;
-
-    __half c0 = 0;
-    __half c1 = 0;
-    __half c2 = 0;
-    __half c3 = 0;
-    __half c4 = 0;
-    __half c5 = 0;
-    __half c6 = 0;
-    __half c7 = 0;
-	
-    const __half* weights = &w[m * KH * KW];
-
-    for (int k = 0; k < KH * KW; k++) {
-      int p = k / KW;
-      int q = k % KW;
-
-      if (start_h + p > -1 && start_h + p < H &&
-	  start_w + q > -1 && start_w + q < W) {
-
-	c0 = __hfma(x4d(b, m, start_h + p, start_w + q), weights[k], c0);
-	if(b + 1 < B)
-	  c1 = __hfma(x4d(b + 1, m, start_h + p, start_w + q), weights[k], c1);
-	if(b + 2 < B)
-	  c2 = __hfma(x4d(b + 2, m, start_h + p, start_w + q), weights[k], c2);
-	if(b + 3 < B)
-	  c3 = __hfma(x4d(b + 3, m, start_h + p, start_w + q), weights[k], c3);
-	if(b + 4 < B)
-	  c4 = __hfma(x4d(b + 4, m, start_h + p, start_w + q), weights[k], c4);
-	if(b + 5 < B)
-	  c5 = __hfma(x4d(b + 5, m, start_h + p, start_w + q), weights[k], c5);
-	if(b + 6 < B)
-	  c6 = __hfma(x4d(b + 6, m, start_h + p, start_w + q), weights[k], c6);
-	if(b + 7 < B)
-	  c7 = __hfma(x4d(b + 7, m, start_h + p, start_w + q), weights[k], c7);
-    
-
-      }
-    }
-
-    y4d(b, m, 0, tx) = c0;	
-    if(b + 1 < B)
-      y4d(b + 1, m, 0, tx) = c1;
-    if(b + 2 < B)
-      y4d(b + 2, m, 0, tx) = c2;
-    if(b + 3 < B)
-      y4d(b + 3, m, 0, tx) = c3;
-    if(b + 4 < B)
-      y4d(b + 4, m, 0, tx) = c4;
-    if(b + 5 < B)
-      y4d(b + 5, m, 0, tx) = c5;
-    if(b + 6 < B)
-      y4d(b + 6, m, 0, tx) = c6;
-    if(b + 7 < B)
-      y4d(b + 7, m, 0, tx) = c7;
-  }
-	
-  #undef y4d 
-  #undef x4d
-}
-
-__global__ void depthwise_convNew8_half1(__half* const __restrict__ y,
-					const __half* const __restrict__ x,
-					const __half* const __restrict__ w,
-					const int B, const int M,
-					const int H, const int W, const int KH,
-					const int KW, const int H_out, const int W_out,
-					const int H_pad, const int W_pad,
-					const int H_stride, const int W_stride)
-{
-
-  #define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0]
-  #define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0]
-
-  const int num = 8;
-
-  const int b = num * blockIdx.x;
-  const int m = (blockIdx.y * blockDim.x  + threadIdx.x)/ (H_out * W_out);
-	
-  if(m < M){
-    const int tx = (blockIdx.y * blockDim.x  + threadIdx.x) % (H_out * W_out);
-
-    const int start_h = (tx / W_out) * H_stride - H_pad;
-    const int start_w = (tx % W_out) * W_stride - W_pad;
-
-    __half c0 = 0;
-    __half c1 = 0;
-    __half c2 = 0;
-    __half c3 = 0;
-    __half c4 = 0;
-    __half c5 = 0;
-    __half c6 = 0;
-    __half c7 = 0;
-	
-    const __half* weights = &w[m * KH * KW];
-
-    for (int k = 0; k < KH * KW; k++) {
-      int p = k / KW;
-      int q = k % KW;
-
-      if (start_h + p > -1 && start_h + p < H &&
-	  start_w + q > -1 && start_w + q < W) {
-
-	c0 = __hfma(x4d(b, m, start_h + p, start_w + q), weights[k], c0);
-      }
-    }
-
-    if(b + 1 < B){
-      for (int k = 0; k < KH * KW; k++) {
-	int p = k / KW;
-	int q = k % KW;
-
-	if (start_h + p > -1 && start_h + p < H &&
-	    start_w + q > -1 && start_w + q < W) {
-
-	  c1 = __hfma(x4d(b + 1, m, start_h + p, start_w + q), weights[k], c1);
-	}
-      }
-    }
-
-    if(b + 2 < B){
-      for (int k = 0; k < KH * KW; k++) {
-	int p = k / KW;
-	int q = k % KW;
-
-	if (start_h + p > -1 && start_h + p < H &&
-	    start_w + q > -1 && start_w + q < W) {
-
-	  c2 = __hfma(x4d(b + 2, m, start_h + p, start_w + q), weights[k], c2);
-	}
-      }
-    }
-
-    if(b + 3 < B){
-      for (int k = 0; k < KH * KW; k++) {
-	int p = k / KW;
-	int q = k % KW;
-
-	if (start_h + p > -1 && start_h + p < H &&
-	    start_w + q > -1 && start_w + q < W) {
-
-	  c3 = __hfma(x4d(b + 3, m, start_h + p, start_w + q), weights[k], c3);
-	}
-      }
-    }
-
-    if(b + 4 < B){
-      for (int k = 0; k < KH * KW; k++) {
-	int p = k / KW;
-	int q = k % KW;
-
-	if (start_h + p > -1 && start_h + p < H &&
-	    start_w + q > -1 && start_w + q < W) {
-
-	  c4 = __hfma(x4d(b + 4, m, start_h + p, start_w + q), weights[k], c4);
-	}
-      }
-    }
-
-    if(b + 5 < B){
-      for (int k = 0; k < KH * KW; k++) {
-	int p = k / KW;
-	int q = k % KW;
-
-	if (start_h + p > -1 && start_h + p < H &&
-	    start_w + q > -1 && start_w + q < W) {
-
-	  c5 = __hfma(x4d(b + 5, m, start_h + p, start_w + q), weights[k], c5);
-	}
-      }
-    }
-
-    if(b + 6 < B){
-      for (int k = 0; k < KH * KW; k++) {
-	int p = k / KW;
-	int q = k % KW;
-
-	if (start_h + p > -1 && start_h + p < H &&
-	    start_w + q > -1 && start_w + q < W) {
-
-	  c6 = __hfma(x4d(b + 6, m, start_h + p, start_w + q), weights[k], c6);
-	}
-      }
-    }
-
-    if(b + 7 < B){
-      for (int k = 0; k < KH * KW; k++) {
-	int p = k / KW;
-	int q = k % KW;
-
-	if (start_h + p > -1 && start_h + p < H &&
-	    start_w + q > -1 && start_w + q < W) {
-
-	  c7 = __hfma(x4d(b + 7, m, start_h + p, start_w + q), weights[k], c7);
-	}
-      }
-    }
-
-    
-
-    y4d(b, m, 0, tx) = c0;	
-    if(b + 1 < B)
-      y4d(b + 1, m, 0, tx) = c1;
-    if(b + 2 < B)
-      y4d(b + 2, m, 0, tx) = c2;
-    if(b + 3 < B)
-      y4d(b + 3, m, 0, tx) = c3;
-    if(b + 4 < B)
-      y4d(b + 4, m, 0, tx) = c4;
-    if(b + 5 < B)
-      y4d(b + 5, m, 0, tx) = c5;
-    if(b + 6 < B)
-      y4d(b + 6, m, 0, tx) = c6;
-    if(b + 7 < B)
-      y4d(b + 7, m, 0, tx) = c7;
-  }
-	
-  #undef y4d 
-  #undef x4d
-}
-
-
-
-
-
-
-
-
-__global__ void depthwise_convNew12(float* const __restrict__ y,
-				    const float* const __restrict__ x,
-				    const float* const __restrict__ w,
-				    const int B, const int M,
-				    const int H, const int W, const int KH,
-				    const int KW, const int H_out, const int W_out,
-				    const int H_pad, const int W_pad,
-				    const int H_stride, const int W_stride)
-{
-
-  #define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0]
-  #define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0]
-
-  const int num = 12;
-
-  const int b = num * blockIdx.x;
-  const int m = (blockIdx.y * blockDim.x  + threadIdx.x)/ (H_out * W_out);
-	
-  if(m < M){
-    const int tx = (blockIdx.y * blockDim.x  + threadIdx.x) % (H_out * W_out);
-
-    const int start_h = (tx / W_out) * H_stride - H_pad;
-    const int start_w = (tx % W_out) * W_stride - W_pad;
-
-    float c0 = 0;
-    float c1 = 0;
-    float c2 = 0;
-    float c3 = 0;
-    float c4 = 0;
-    float c5 = 0;
-    float c6 = 0;
-    float c7 = 0;
-    float c8 = 0;
-    float c9 = 0;
-    float c10 = 0;
-    float c11 = 0;
-	
-    const float* weights = &w[m * KH * KW];
-
-    for (int k = 0; k < KH * KW; k++) {
-      int p = k / KW;
-      int q = k % KW;
-
-      if (start_h + p > -1 && start_h + p < H &&
-	  start_w + q > -1 && start_w + q < W) {
-
-	c0 += x4d(b, m, start_h + p, start_w + q) * weights[k];
-	if(b + 1 < B)
-	  c1 += x4d(b + 1, m, start_h + p, start_w + q) * weights[k];
-	if(b + 2 < B)
-	  c2 += x4d(b + 2, m, start_h + p, start_w + q) * weights[k];
-	if(b + 3 < B)
-	  c3 += x4d(b + 3, m, start_h + p, start_w + q) * weights[k];
-	if(b + 4 < B)
-	  c4 += x4d(b + 4, m, start_h + p, start_w + q) * weights[k];
-	if(b + 5 < B)
-	  c5 += x4d(b + 5, m, start_h + p, start_w + q) * weights[k];
-	if(b + 6 < B)
-	  c6 += x4d(b + 6, m, start_h + p, start_w + q) * weights[k];
-	if(b + 7 < B)
-	  c7 += x4d(b + 7, m, start_h + p, start_w + q) * weights[k];
-	if(b + 8 < B)
-	  c8 += x4d(b + 8, m, start_h + p, start_w + q) * weights[k];
-	if(b + 9 < B)
-	  c9 += x4d(b + 9, m, start_h + p, start_w + q) * weights[k];
-	if(b + 10 < B)
-	  c10 += x4d(b + 10, m, start_h + p, start_w + q) * weights[k];
-	if(b + 11 < B)
-	  c11 += x4d(b + 11, m, start_h + p, start_w + q) * weights[k];
-    
-
-      }
-    }
-
-    y4d(b, m, 0, tx) = c0;	
-    if(b + 1 < B)
-      y4d(b + 1, m, 0, tx) = c1;
-    if(b + 2 < B)
-      y4d(b + 2, m, 0, tx) = c2;
-    if(b + 3 < B)
-      y4d(b + 3, m, 0, tx) = c3;
-    if(b + 4 < B)
-      y4d(b + 4, m, 0, tx) = c4;
-    if(b + 5 < B)
-      y4d(b + 5, m, 0, tx) = c5;
-    if(b + 6 < B)
-      y4d(b + 6, m, 0, tx) = c6;
-    if(b + 7 < B)
-      y4d(b + 7, m, 0, tx) = c7;
-    if(b + 8 < B)
-      y4d(b + 8, m, 0, tx) = c8;
-    if(b + 9 < B)
-      y4d(b + 9, m, 0, tx) = c9;
-    if(b + 10 < B)
-      y4d(b + 10, m, 0, tx) = c10;
-    if(b + 11 < B)
-      y4d(b + 11, m, 0, tx) = c11;
-	
-  }
-	
-  #undef y4d 
-  #undef x4d
-}
-
-
-__global__ void depthwise_convNew12_half(__half* const __restrict__ y,
-				    const __half* const __restrict__ x,
-				    const __half* const __restrict__ w,
-				    const int B, const int M,
-				    const int H, const int W, const int KH,
-				    const int KW, const int H_out, const int W_out,
-				    const int H_pad, const int W_pad,
-				    const int H_stride, const int W_stride)
-{
-
-  #define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0]
-  #define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0]
-
-  const int num = 12;
-
-  const int b = num * blockIdx.x;
-  const int m = (blockIdx.y * blockDim.x  + threadIdx.x)/ (H_out * W_out);
-	
-  if(m < M){
-    const int tx = (blockIdx.y * blockDim.x  + threadIdx.x) % (H_out * W_out);
-
-    const int start_h = (tx / W_out) * H_stride - H_pad;
-    const int start_w = (tx % W_out) * W_stride - W_pad;
-
-    __half c0 = 0;
-    __half c1 = 0;
-    __half c2 = 0;
-    __half c3 = 0;
-    __half c4 = 0;
-    __half c5 = 0;
-    __half c6 = 0;
-    __half c7 = 0;
-    __half c8 = 0;
-    __half c9 = 0;
-    __half c10 = 0;
-    __half c11 = 0;
-	
-    const __half* weights = &w[m * KH * KW];
-
-    for (int k = 0; k < KH * KW; k++) {
-      int p = k / KW;
-      int q = k % KW;
-
-      if (start_h + p > -1 && start_h + p < H &&
-	  start_w + q > -1 && start_w + q < W) {
-
-	c0 = __hfma(x4d(b, m, start_h + p, start_w + q), weights[k], c0);
-	if(b + 1 < B)
-	  c1 = __hfma(x4d(b + 1, m, start_h + p, start_w + q), weights[k], c1);
-	if(b + 2 < B)
-	  c2 = __hfma(x4d(b + 2, m, start_h + p, start_w + q), weights[k], c2);
-	if(b + 3 < B)
-	  c3 = __hfma(x4d(b + 3, m, start_h + p, start_w + q), weights[k], c3);
-	if(b + 4 < B)
-	  c4 = __hfma(x4d(b + 4, m, start_h + p, start_w + q), weights[k], c4);
-	if(b + 5 < B)
-	  c5 = __hfma(x4d(b + 5, m, start_h + p, start_w + q), weights[k], c5);
-	if(b + 6 < B)
-	  c6 = __hfma(x4d(b + 6, m, start_h + p, start_w + q), weights[k], c6);
-	if(b + 7 < B)
-	  c7 = __hfma(x4d(b + 7, m, start_h + p, start_w + q), weights[k], c7);
-	if(b + 8 < B)
-	  c8 = __hfma(x4d(b + 8, m, start_h + p, start_w + q), weights[k], c8);
-	if(b + 9 < B)
-	  c9 = __hfma(x4d(b + 9, m, start_h + p, start_w + q), weights[k], c9);
-	if(b + 10 < B)
-	  c10 = __hfma(x4d(b + 10, m, start_h + p, start_w + q), weights[k], c10);
-	if(b + 11 < B)
-	  c11 = __hfma(x4d(b + 11, m, start_h + p, start_w + q), weights[k], c11);
-    
-
-      }
-    }
-
-    y4d(b, m, 0, tx) = c0;	
-    if(b + 1 < B)
-      y4d(b + 1, m, 0, tx) = c1;
-    if(b + 2 < B)
-      y4d(b + 2, m, 0, tx) = c2;
-    if(b + 3 < B)
-      y4d(b + 3, m, 0, tx) = c3;
-    if(b + 4 < B)
-      y4d(b + 4, m, 0, tx) = c4;
-    if(b + 5 < B)
-      y4d(b + 5, m, 0, tx) = c5;
-    if(b + 6 < B)
-      y4d(b + 6, m, 0, tx) = c6;
-    if(b + 7 < B)
-      y4d(b + 7, m, 0, tx) = c7;
-    if(b + 8 < B)
-      y4d(b + 8, m, 0, tx) = c8;
-    if(b + 9 < B)
-      y4d(b + 9, m, 0, tx) = c9;
-    if(b + 10 < B)
-      y4d(b + 10, m, 0, tx) = c10;
-    if(b + 11 < B)
-      y4d(b + 11, m, 0, tx) = c11;
-	
-  }
-	
-  #undef y4d 
-  #undef x4d
-}
-
-
-
-
-
-__global__ void depthwise_convNew4_half2(__half* const __restrict__ y,
-					const __half* const __restrict__ x,
-					const __half* const __restrict__ w,
-					const int B, const int M,
-					const int H, const int W, const int KH,
-					const int KW, const int H_out, const int W_out,
-					const int H_pad, const int W_pad,
-					const int H_stride, const int W_stride)
-{
-
-  #define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0]
-  #define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0]
-
-  const int num = 4;
-
-  const int b = num * blockIdx.x;
-  const int m = (blockIdx.y * blockDim.x  + threadIdx.x)/ (H_out * W_out);
-	
-  if(m < M){
-    const int tx = (blockIdx.y * blockDim.x  + threadIdx.x) % (H_out * W_out);
-
-    const int start_h = (tx / W_out) * H_stride - H_pad;
-    const int start_w = (tx % W_out) * W_stride - W_pad;
-
-    __half2 c0 = __half2half2(0);
-    __half2 c1 = __half2half2(0);
- 
-    const __half* weights = &w[m * KH * KW];
-
-    for (int k = 0; k < KH * KW; k++) {
-      int p = k / KW;
-      int q = k % KW;
-      if (start_h + p > -1 && start_h + p < H &&
-	  start_w + q > -1 && start_w + q < W) {
-
-      
-	__half2 t1;
-	__half2 t2;
-	if(b + 3 < B){
-	    t1 = __halves2half2(x4d(b + 1, m, start_h + p, start_w + q), x4d(b, m, start_h + p, start_w + q));
-	    t2 = __halves2half2(x4d(b + 3, m, start_h + p, start_w + q), x4d(b + 2, m, start_h + p, start_w + q));
-	 }
-	else if(b + 2 < B){
-	  t1 = __halves2half2(x4d(b + 1, m, start_h + p, start_w + q), x4d(b, m, start_h + p, start_w + q));
-	  t2 = __halves2half2(0, x4d(b + 2, m, start_h + p, start_w + q));
-
-	}
-	else if(b + 1 < B){
-	  t1 = __halves2half2(x4d(b + 1, m, start_h + p, start_w + q), x4d(b, m, start_h + p, start_w + q));
-	}
-	else{
-	  t1 = __halves2half2(0, x4d(b, m, start_h + p, start_w + q));
-
-	 }
-
-	
-	c0 = __hfma2(t1, __halves2half2(weights[k], weights[k]), c0);
-	c1 = __hfma2(t2, __halves2half2(weights[k], weights[k]), c1);
-	
-      }
-    }
-
-    y4d(b, m, 0, tx) = __high2half(c0);	
-    if(b + 1 < B)
-      y4d(b + 1, m, 0, tx) = __low2half(c0);
-    if(b + 2 < B)
-      y4d(b + 2, m, 0, tx) = __high2half(c1);
-    if(b + 3 < B)
-      y4d(b + 3, m, 0, tx) = __low2half(c1);
-
-  }
-	
-  #undef y4d 
-  #undef x4d
-}
-
-
-
-
-
-
-// Perforated Tensor Conv with 'perforation_rate' parameter
-void* tensorConvPerf(void* input_ptr, void* filter_ptr,
-		     int vertical_pad, int horizontal_pad,
-		     int vertical_stride, int horizontal_stride,
-		     int conv_mode, int conv_groups, int row, int col){
-
-  INFO("*** TensorConvolution \n");
-  profileEvent("tensorConv");
-
-  Tensor* input = (Tensor*) input_ptr;
-  Tensor* filter = (Tensor*) filter_ptr;
-
-  cudnnConvolutionDescriptor_t convDesc;
-  cudnnConvolutionFwdAlgo_t convAlgo;
-  cudnnConvolutionMode_t mode;
-  if(conv_mode == 0)
-    mode = CUDNN_CONVOLUTION;
-  else if(conv_mode == 1)
-    mode = CUDNN_CROSS_CORRELATION;
-
-  // FIXIT: Need to be more aware of the implications of alpha and beta
-  float alpha = 1.0f, beta = 0.0f;
-
-  // TODO: Support other cases;
-  hostToDeviceCopy(input);
-  hostToDeviceCopy(filter);
-
-  INFO("vertical_stride = %lu, horizontal_stride = %lu \n", vertical_stride, horizontal_stride);
-
-  checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc));
-
-  //FIXME: Current hack to preserve backward compatibilty
-  if(conv_groups == 0){
-    conv_groups = 1;
-  }
-
-  // NOTE: Adding support for grouped convolution
-  checkCUDNN(cudnnSetConvolutionGroupCount(convDesc, conv_groups));
-
-  int new_v = vertical_stride + row;
-  int new_h = horizontal_stride + col;
-  cudnnDataType_t computeType = CUDNN_DATA_FLOAT;
-  // FIXIT: Think if upscaling values need to be configurable?
-  // IMP-FIXIT: Either make mode configurable OR see if CUDNN_CONVOLUTION MODE should be used?
-  checkCUDNN(cudnnSetConvolution2dDescriptor(convDesc,
-					     vertical_pad, horizontal_pad, // conv padding
-					     new_v, new_h, // conv strides
-					     1, 1, // upscaling values
-					     mode , // mode is configurable
-					     computeType)); // defines compute precision
-
-  int n, c, h, w; // output dimensions
-  // Find dimension of convolution output
-  checkCUDNN(cudnnGetConvolution2dForwardOutputDim(convDesc,
-						   input->tensor_desc,
-						   filter->filter_desc,
-						   &n, &c, &h, &w));
-
-
-  DEBUG("**Output Tensor Dims, n = %d, c = %d, h = %d, w = %d \n", n, c, h, w);
-
-  Tensor* output;
-  if(input->data_format == CUDNN_TENSOR_NCHW)
-    output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type,
-				      CUDNN_TENSOR_NCHW, n, c, h, w);
-  else if(input->data_format == CUDNN_TENSOR_NHWC){
-    DEBUG("* NHWC Format \n");
-    output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type,
-				      CUDNN_TENSOR_NHWC, n, h, w, c);
-  }
-  else
-    ERROR("Unsupported Tensor Type");
-
-  // NOTE: Changing output tensor placement from host to device
-  changeTensorPlacement(output, DEVICE);
-  // NOTE: Necessary to insert the above call for every output tensor
-
-  DEBUG("tensor->data_type = %d, tensor->data_format = %d, N = %d, C = %d, H = %d, W = %d \n",
-	output->data_type, output->data_format, output->dims.dim_sizes[0], output->dims.dim_sizes[1],
-	output->dims.dim_sizes[2], output->dims.dim_sizes[3]);
-
-  if(convDesc == NULL || input->tensor_desc == NULL ||
-     filter->filter_desc == NULL || output->tensor_desc == NULL)
-    ERROR("NULL descriptor! \n");
-
-
-  // Debugging info prints
-  printTensorDescInfo(input);
-  printTensorDescInfo(filter);
-  printTensorDescInfo(output);
-
-  // NOTE-FIXIT: function failing for NHWC formats - perhaps some CUDNN support is lacking
-  checkCUDNN(cudnnGetConvolutionForwardAlgorithm(cudnnHandle,
-						 input->tensor_desc,
-						 filter->filter_desc,
-						 convDesc,
-						 output->tensor_desc,
-						 CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
-						 //CUDNN_CONVOLUTION_FWD_NO_WORKSPACE,
-						 0,
-						 &convAlgo));
-
-
-  DEBUG("ConvAlgo = %d, FFT = %d, GEMM = %d, WINOGRAD = %d \n", convAlgo,
-	CUDNN_CONVOLUTION_FWD_ALGO_FFT, CUDNN_CONVOLUTION_FWD_ALGO_GEMM,
-	CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD);
-
-
-  // FIXIT: Algo shouldn't be hardcoded
-  convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
-
-  size_t workspace_size;
-  checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle,
-						     input->tensor_desc,
-						     filter->filter_desc,
-						     convDesc,
-						     output->tensor_desc,
-						     convAlgo,
-						     &workspace_size));
-
-  // Allocating memory for the convolution workspace
-  void* workspace;
-  checkCudaErrors(cudaMalloc(&workspace, workspace_size));
-  DEBUG("workspace size = %d \n", workspace_size);
-
-
-  checkCUDNN(cudnnConvolutionForward(cudnnHandle, &alpha, input->tensor_desc,
-				     input->gpu_data, filter->filter_desc, filter->gpu_data,
-				     convDesc, convAlgo, workspace, workspace_size,
-				     &beta, output->tensor_desc, output->gpu_data));
-
-
-  int old_w = w;
-  int old_h = h;
-  h = (2 * vertical_pad + input->dims.dim_sizes[2] - filter->dims.dim_sizes[2]) / vertical_stride + 1;
-  w = (2 * horizontal_pad + input->dims.dim_sizes[3] - filter->dims.dim_sizes[3]) / horizontal_stride + 1;
-
-  Tensor* new_output;
-  if(input->data_format == CUDNN_TENSOR_NCHW)
-    new_output = (Tensor*) create4DTensor((cudnnDataType_t) float_type, //input->data_type,
-					  CUDNN_TENSOR_NCHW, n, c, h, w);
-  else if(input->data_format == CUDNN_TENSOR_NHWC){
-    DEBUG("* NHWC Format \n");
-    new_output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type,
-					  CUDNN_TENSOR_NHWC, n, h, w, c);
-  }
-  else
-    ERROR("Unsupported Tensor Type");
-
-
-  int numBlocks = (n * c * h * w  + 127) / 128;
-  if(vertical_stride == 0 && row == 0)
-    return output;
-
-  if(vertical_stride == 1 && row == 1){
-    interpolateRow<<<numBlocks,128>>>(n * c * h * w, old_h, n, c, h, w,
-				      (float *)output->gpu_data, (float *)new_output->gpu_data);
-  }
-  else if(horizontal_stride == 1 && col == 1){
-    interpolateCol<<<numBlocks,128>>>(n * c * h * w, old_w, n, c, h, w,
-				      (float *)output->gpu_data, (float *)new_output->gpu_data);
-  }
-  else if (col > 0){
-    interpolateXCol<<<numBlocks,128>>>(n * c * h * w, old_w, n, c, h, w,
-				       (float *)output->gpu_data, (float *)new_output->gpu_data, col + 1);
-  }
-  else{
-    interpolateXRow<<<numBlocks,128>>>(n * c * h * w, old_h, n, c, h, w,
-				       (float *)output->gpu_data, (float *)new_output->gpu_data, row + 1);
-  }
-
-
-  cudaDeviceSynchronize();
-
-  profileEvent("tensorConv_end", true);
-
-
-  changeTensorPlacement(new_output, DEVICE);
-  return new_output;
-
-}
-
-
-
-
-
-
-
-
-}
-
diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/tensor_runtime.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/tensor_runtime.cu
deleted file mode 100644
index 5c6f0369384fd580f32ab2771a988e840a33076a..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/tensor_runtime.cu
+++ /dev/null
@@ -1,2121 +0,0 @@
-/* This file includes the API implementation of the HPVM tensor runtime built on cublas, cudnn
-**
-**  Author: Hashim Sharif
-**  Email: hsharif3@illinois.edu
-*/
-
-#include <stdio.h>
-#include <stdarg.h>
-#include <cstdio>
-#include <cstdlib>
-#include <cmath>
-#include <ctime>
-#include <cfloat>
-#include <algorithm>
-#include <chrono>
-#include <iomanip>
-#include <iostream>
-#include <map>
-#include <memory>
-#include <random>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include <cuda_runtime.h>
-#include <device_launch_parameters.h>
-
-#include <cublas_v2.h>
-#include <cudnn.h>
-#include <cublas_api.h>
-#include <cuda_fp16.h>
-#include <driver_types.h>
-
-
-// Tensor runtime header files
-#include "tensor_runtime.h"
-#include "tensor_utils.h"
-#include "debug.h"
-#include "profiling.h"
-#include "fp16_conversion.h"
-#include "global_data.h"
-#include "error.h"
-#include "tensor.h"
-#include "op_overheads.h"
-#include "half_precision_api.h"
-#include "hpvm-rt-controller.h"
-#include "approxhpvm_runtime_utils.h" 
-#include "approx_api.h"
-
-
-// Image tensor runtime implementation
-#include "img_tensor_runtime.cu"
-
-//** Potential Improvements:
-//   1) Add support for dataypes beyond floats and half 
-//   2) Support for more CUDNN operations
-
-
-
-void llvm_hpvm_initTensorRt(int gpuid){
-
-  if(!runtime_initialized){
-    
-    printf("INITIALIZING GPU %d \n", gpuid);
-    // NOTE: Setting the target GPU. Can we use multiple GPUs?
-    checkCudaErrors(cudaSetDevice(gpuid));
-    // Initializing cuDNN and cuBlas handles
-    checkCudaErrors(cublasCreate(&cublasHandle));
-    checkCUDNN(cudnnCreate(&cudnnHandle));
-
-
-#ifdef PROMISE_TUNER_ENABLED
-    //    readOpenTunerFlags("opentuner_flags");
-    readOpenTunerFlags("promise_flags");
-#endif
-
-
-#ifdef ERROR_INJECTION_ENABLED
-    readOpenTunerFlags("opentuner_flags");
-#endif
-
-    
-    runtime_initialized = true;
-  }
-  
-}
-
-
-void llvm_hpvm_cleanupTensorRt(){
-  DEBUG("\**** llvm_hpvm_cleanupTensorRt ***\n");
-  dumpAccuracyNorms();
-}
-
-
-void llvm_hpvm_initApproxhpvmRt(int gpuid){
-  llvm_hpvm_initTensorRt(gpuid);
-  approxhpvm_runtime_mode = true;
-}
-
-void llvm_hpvm_cleanupApproxhpvmRt(){
-
-}
-
-
-
-void dumpAccuracyNorms(){
-
-  #ifdef ERROR_INJECTION_ENABLED
-  
-  
-  #endif
-
-  dump_result("accuracy_summary");
-
-}
-
-
-// Returns the number of GPUs active on the platform
-int getGPUCount(){
-  int num_gpus;
-  checkCudaErrors(cudaGetDeviceCount(&num_gpus));
-  return num_gpus;
-}
-
-
-
-void clearTensorMap(){
-
-  tensors_ptr.clear();
-  host_ptr.clear();
-  obj_ptr.clear();
-}
-
-
-void startMemTracking(){
-
-  tensors_ptr.clear();
-  host_ptr.clear();
-  obj_ptr.clear();
-
-  tracked_tensors.clear();
-}
-
-
-void freeOutputTensors(){
-
-  DEBUG("**** Freeing Ouput Tensors *** \n");
-  for(int i = 0; i < tensors_ptr.size(); i++){
-    cudaFree(tensors_ptr[i]);
-    tensors_ptr[i] = NULL;
-  }
-
-  for(int i = 0; i < host_ptr.size(); i++){
-    free(host_ptr[i]);
-    host_ptr[i] = NULL;
-  }
-  
-  for(int i = 0; i < obj_ptr.size(); i++){
-    free(obj_ptr[i]);
-    obj_ptr[i] = NULL;
-  }
-}
-
-
-
-void clearOpCounter(){
-  total_ops = 0;
-  op_counter = 0;
-  op_accuracies.clear();
-}
-
-
-
-void freeBatchMemory(){
-  // Free allocated memory for the current mini-batch
-  freeOutputTensors();
-  // Reinitialize couter for OpenTuner flags - next mini-batch of execution
-  op_counter = 0;
-  // Clearing profiling data map
-  func_counters.clear();
-}
-
-
-
-
-// FIXIT: Fix any assumptions on the NCHW format
-// TODO: benchmark split performance and check if it is prohibitively high?
-void** tensorSplit(void* tensor_ptr, int num_splits, int split_dim){
-
-  INFO("*** TensorSplit \n");  
-  profileEvent("tensorSplit");
-
-  Tensor* tensor = (Tensor*) tensor_ptr;
-  
-  deviceToHostCopy(tensor); // Splitting done on the host
-
-  Tensor** splits = (Tensor**) malloc(sizeof(Tensor*) * num_splits);
-  size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * tensor->dims.num_dims);
-  for(unsigned int i = 0; i < tensor->dims.num_dims; i++){
-    dim_sizes[i] = tensor->dims.dim_sizes[i];
-  }
-
-  
-  dim_sizes[split_dim] = tensor->dims.dim_sizes[split_dim] / num_splits;
-  if(dim_sizes[split_dim] < 1)
-    ERROR("Split Dimension < 1 after splitting");
-
-  size_t copy_size = getTypeSize(tensor->data_type);
-  for(unsigned int i = split_dim; i < tensor->dims.num_dims; i++){
-    copy_size = copy_size * dim_sizes[i];
-  }
-  
-  for(unsigned int i = 0; i < num_splits; i++){
-    // FIXIT: Don't be specific to 4D tensors
-    // NOTE: Using same data format (NHWC/NCHW) for the split tensors
-    INFO("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n",
-	 dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]);
-
-    Tensor* split = (Tensor*) create4DTensor(tensor->data_type, tensor->data_format,
-					  dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]);
-    
-    size_t copy_start = i * copy_size;
-    size_t copy_stride = num_splits * copy_size;
-    INFO("copy_size = %d, copy_start = %d, copy_stride = %d, tensor->size_in_bytes = %d \n",
-	 copy_size, copy_start, copy_stride, tensor->size_in_bytes);
-
-    int index = 0;
-    while(copy_start + copy_size <= tensor->size_in_bytes){
-      memcpy(((char*) split->host_data + (index * copy_size)),
-	     ((char*)tensor->host_data + copy_start),
-	     copy_size);
-      copy_start += copy_stride;
-      index++;
-    }
-   	
-    splits[i] = split;     
-  }
-
-  profileEvent("tensorSplit_end", true);
-
-  return (void**) splits;
-}
-
-
-void* tensorConcat(void** tensors_ptr, int num_splits, int split_dim){
-
-  INFO("*** TensorConcat \n");  
-  profileEvent("tensorConcat");
-
-  Tensor** tensors = (Tensor**) tensors_ptr;
-
-  for(int i = 0; i < num_splits; i++){
-    deviceToHostCopy(tensors[i]); // Concatenation done on the host
-  }
-  
-  // The no of dimensions of concatenated tensor are the same
-  size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * tensors[0]->dims.num_dims);
-  for(unsigned int i = 0; i < tensors[0]->dims.num_dims; i++){
-    dim_sizes[i] = tensors[0]->dims.dim_sizes[i];
-  }
-  
-  size_t copy_size = getTypeSize(tensors[0]->data_type);
-  for(unsigned int i = split_dim; i < tensors[0]->dims.num_dims; i++){
-    copy_size = copy_size * dim_sizes[i];
-  }
-
-  dim_sizes[split_dim] = dim_sizes[split_dim] * num_splits;
-  if(dim_sizes[split_dim] < 1)
-    ERROR("Split Dimension < 1 after concat");
-
-  Tensor* output = (Tensor*) create4DTensor(tensors[0]->data_type, tensors[0]->data_format,
-					 dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]);
-
-  INFO("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n",
-       dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]);
-
-
-  int num_copies = 1;
-  for(unsigned int i = 0; i < split_dim; i++){
-    num_copies = num_copies * dim_sizes[i];
-  }
-  
-  size_t copy_stride = num_splits * copy_size;
-  INFO("copy_size = %d, num_copies = %d, copy_stride = %d, output->size_in_bytes = %d \n",
-       copy_size, num_copies, copy_stride, output->size_in_bytes);
-
-  for(unsigned int i = 0; i < num_copies; i++){
-    // FIXIT: Don't be specific to 4D tensors
-    size_t copy_start = i * copy_stride;
-   
-    for(int j = 0; j < num_splits; j++){
-      struct Tensor* split = tensors[j];
-      memcpy(((char*) output->host_data + copy_start + (j * copy_size)),
-	     ((char*) split->host_data + (i * copy_size)),
-	     copy_size);   
-    }      
-  }
-
-  profileEvent("tensorConcat_end", true);
-
-  return output;
-}
-
-
-
-void* tensorLRN(void* input_ptr, unsigned int LRN_window,
-		double LRN_alpha, double LRN_beta, double LRN_k){
-
-  INFO("*** TensorLRN \n");  
-  profileEvent("tensorLRN");
-
-  Tensor* input = (Tensor*) input_ptr;
-
-  hostToDeviceCopy(input);
-
-  float alpha = 1.0f, beta = 0.0f;
-  cudnnLRNDescriptor_t LRNDesc;
-  checkCUDNN(cudnnCreateLRNDescriptor(&LRNDesc));
-
-  INFO("window = %d, LRN_alpha = %f, LRN_beta = %f, LRN_k = %f \n",
-       LRN_window, LRN_alpha, LRN_beta, LRN_k);
- 
-  
-  checkCUDNN(cudnnSetLRNDescriptor(LRNDesc, LRN_window, LRN_alpha, LRN_beta, LRN_k));
-
-  size_t* dim_sizes = input->dims.dim_sizes;
-  Tensor* output = (Tensor*) create4DTensor((cudnnDataType_t) float_type, 
-			  CUDNN_TENSOR_NCHW, dim_sizes[0], dim_sizes[1],
-			  dim_sizes[2], dim_sizes[3]);
-  // NOTE: Changing output tensor placement from host to device
-  changeTensorPlacement(output, DEVICE); 
-  // NOTE: Necessary to insert the above call for every output tensor
-
-  printTensorDescInfo(input);
-  printTensorDescInfo(output);
-  
-  checkCUDNN(cudnnLRNCrossChannelForward(cudnnHandle, LRNDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1,
-					 &alpha, input->tensor_desc, input->gpu_data,
-					 &beta, output->tensor_desc, output->gpu_data));
-
-  profileEvent("tensorLRN_end", true);
-    
-  return output;
-}
-
-
-void printTensorDims2(void* tensor_ptr){
-
-  struct Tensor* tensor = (struct Tensor*) tensor_ptr;
-
-  printf("Num_elems = %lu \n", tensor->num_elems);
-  for (int i = 0; i < tensor->dims.num_dims; i++){
-    printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]);
-  }
-}
-
-
-
-
-// FIXIT: tensorAdd currently only works for 4D tensors
-void* tensorAdd(void* x_ptr, void* bias_ptr){
-  
-  Tensor* x = (Tensor*) x_ptr;
-  Tensor* bias = (Tensor*) bias_ptr;
-  
-  INFO("*** TensorAdd \n");  
-  profileEvent("Add");
-    
-  float alpha = 1.0f;
-  //float beta = 0.0f;
-  hostToDeviceCopy(x);
-  hostToDeviceCopy(bias);
-
-  convertToFP32(x);
-  convertToFP32(bias);
-
-  
-  INFO("x->num_elems = %d \n", x->num_elems);
-  INFO("bias->num_elems = %d \n", bias->num_elems);
-
-  if(cudnnHandle == NULL){
-    ERROR("cudnnHandle NOT initialized!! \n");    
-  }
-  
-  // FIXIT: routine fails for 3D tensors
-  checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc,
-			    bias->gpu_data, &alpha, x->tensor_desc, x->gpu_data));
-
-  profileEvent("Add_end", true);
-
-  #ifdef ERROR_INJECTION_ENABLED  
-  if(op_counter >= total_ops){
-    ERROR("No accuracy flag found \n");
-  }
-  
-  int op_acc = op_accuracies[op_counter];
-
-  // Forcing 0 error for (Resnet-like) equal dimension adds (Testing-only)
-  
-  //-- if (bias->dims.dim_sizes[0] > 1)
-  //--  op_acc = 0;
-  // Skip errorInjection if explicitly requested
-  //-- if (skip_tensors.find(op_counter) != skip_tensors.end()){
-  //--   op_acc = 0;  
-  //  }
-
-  void* error_norms = tensorAddError(x, op_acc);
-  add_norms(error_norms, "tensorAdd", op_acc);
-  add_bias_overheads(x, op_acc);
-  op_counter++;
-  
-  #endif
-  
-  
-  return x;
-}
-
-
-// FIXIT: Generalize all of the routines for types {half, float, double}
-void* tensorConvolution(void* input_ptr, void* filter_ptr,
-			int vertical_pad, int horizontal_pad,
-			int vertical_stride, int horizontal_stride,
-			int conv_mode, int conv_groups){  
-  
-  INFO("*** TensorConvolution \n");
-  profileEvent("Conv");
-
-  Tensor* input = (Tensor*) input_ptr;
-  Tensor* filter = (Tensor*) filter_ptr;
-  
-  cudnnConvolutionDescriptor_t convDesc;
-  cudnnConvolutionFwdAlgo_t convAlgo;
-  cudnnConvolutionMode_t mode;
-  if(conv_mode == 0)
-    mode = CUDNN_CONVOLUTION;
-  else if(conv_mode == 1)
-    mode = CUDNN_CROSS_CORRELATION;
-
-  mode = CUDNN_CROSS_CORRELATION;
-  // FIXIT: Need to be more aware of the implications of alpha and beta
-  float alpha = 1.0f, beta = 0.0f;
-  
-  // TODO: Support other cases;  
-  hostToDeviceCopy(input);
-  hostToDeviceCopy(filter);
-
-  convertToFP32(input);
-  convertToFP32(filter);
-
-  
-  INFO("vertical_stride = %lu, horizontal_stride = %lu \n", vertical_stride, horizontal_stride);  
-
-  checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc));
-
-  //FIXME: Current hack to preserve backward compatibilty
-  if(conv_groups == 0){
-    conv_groups = 1;
-  }
-  
-  
-  
-  cudnnDataType_t computeType = CUDNN_DATA_FLOAT;
-  // FIXIT: Think if upscaling values need to be configurable?
-  // IMP-FIXIT: Either make mode configurable OR see if CUDNN_CONVOLUTION MODE should be used?
-  checkCUDNN(cudnnSetConvolution2dDescriptor(convDesc,
-					     vertical_pad, horizontal_pad, // conv padding
-					     vertical_stride, horizontal_stride, // conv strides
-					     1, 1, // upscaling values
-					     mode , // mode is configurable
-                                             computeType)); // defines compute precision
-
-  // NOTE: Adding support for grouped convolution
-  checkCUDNN(cudnnSetConvolutionGroupCount(convDesc, conv_groups));
-
-  int n, c, h, w; // output dimensions  
-  // Find dimension of convolution output
-
-  if(input->tensor_desc == NULL || filter->filter_desc == NULL)
-    ERROR("Input or Filter descriptor is NULL");
-    
-  checkCUDNN(cudnnGetConvolution2dForwardOutputDim(convDesc,
-						   input->tensor_desc,
-						   filter->filter_desc,
-						   &n, &c, &h, &w));
-
-    
-  DEBUG("**Output Tensor Dims, n = %d, c = %d, h = %d, w = %d \n", n, c, h, w);
-
-  Tensor* output;
-  if(input->data_format == CUDNN_TENSOR_NCHW)
-    output = (Tensor*) create4DTensor((cudnnDataType_t) float_type,  
-			              CUDNN_TENSOR_NCHW, n, c, h, w);
-  else if(input->data_format == CUDNN_TENSOR_NHWC){
-    DEBUG("* NHWC Format \n");
-    output = (Tensor*) create4DTensor((cudnnDataType_t) float_type, 
-			              CUDNN_TENSOR_NHWC, n, h, w, c);
-  }
-  else
-    ERROR("Unsupported Tensor Type");
-
-  // NOTE: Changing output tensor placement from host to device
-  changeTensorPlacement(output, DEVICE); 
-  // NOTE: Necessary to insert the above call for every output tensor
-    
-  DEBUG("tensor->data_type = %d, tensor->data_format = %d, N = %d, C = %d, H = %d, W = %d \n",
-	output->data_type, output->data_format, output->dims.dim_sizes[0],
-	output->dims.dim_sizes[1],
-	output->dims.dim_sizes[2], output->dims.dim_sizes[3]);
-
-  if(convDesc == NULL || input->tensor_desc == NULL ||
-     filter->filter_desc == NULL || output->tensor_desc == NULL)
-    ERROR("NULL descriptor! \n");
-
-
-  // Debugging info prints
-  printTensorDescInfo(input);
-  printTensorDescInfo(filter);
-  printTensorDescInfo(output);
-
-  // NOTE-FIXIT: function failing for NHWC formats - perhaps some CUDNN support is lacking
-  checkCUDNN(cudnnGetConvolutionForwardAlgorithm(cudnnHandle,
-						 input->tensor_desc,
-						 filter->filter_desc,
-						 convDesc,
-						 output->tensor_desc,
-						 CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,	 
-						 //CUDNN_CONVOLUTION_FWD_NO_WORKSPACE,
-						 0,
-						 &convAlgo));
-
-  
-  DEBUG("ConvAlgo = %d, FFT = %d, GEMM = %d, WINOGRAD = %d \n", convAlgo,
-	 CUDNN_CONVOLUTION_FWD_ALGO_FFT, CUDNN_CONVOLUTION_FWD_ALGO_GEMM,
-	 CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD);
-	 
-
-  // FIXIT: Algo shouldn't be hardcoded
-  //convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
-  convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
-
-  size_t workspace_size;
-  checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle,
-						     input->tensor_desc,
-						     filter->filter_desc,
-						     convDesc,
-						     output->tensor_desc,
-						     convAlgo,
-						     &workspace_size));
-
-  // Allocating memory for the convolution workspace
-  void* workspace;
-  checkCudaErrors(cudaMalloc(&workspace, workspace_size)); 
-  DEBUG("workspace size = %d \n", workspace_size);
-
-
-  checkCUDNN(cudnnConvolutionForward(cudnnHandle, &alpha, input->tensor_desc,
-				     input->gpu_data, filter->filter_desc, filter->gpu_data,
-				     convDesc, convAlgo, workspace, workspace_size,
-				     &beta, output->tensor_desc, output->gpu_data));
-		       
-  profileEvent("Conv_end", true);
-
-
-  #ifdef ERROR_INJECTION_ENABLED
-
-  if(op_counter >= total_ops){
-    ERROR("No accuracy flag found \n");
-  }
-  
-  int op_acc = op_accuracies[op_counter];
-
-  // Ignore Error Injection for Depthwise Convolution  
-  /*if (conv_groups > 1){
-    op_acc = 0;
-  }
-  */
-
-  
-  void* error_norms = tensorAddError(output, op_acc);
-  add_norms(error_norms, "tensorConv", op_acc);
-  add_conv_overheads(input, filter, vertical_stride, horizontal_stride, op_acc);
-
-  op_counter++;
-  
-  #endif
-  
-  
-  return output;
-}
-
-
-
-// NOTE: Supports Max and Avg Pooling
-void* tensorPooling(void* input_ptr,
-		    int poolFunction,
-		    int window_height, int window_width,
-		    int vertical_pad, int horizontal_pad,
-		    int vertical_stride, int horizontal_stride){
-
-  INFO("*** TensorPooling \n");
-  profileEvent("Pool");
-
-  Tensor* input = (Tensor*) input_ptr;
-
-  cudnnPoolingDescriptor_t poolDesc;
-  // FIXIT: Need to be more aware of the implications of alpha and beta
-  float alpha = 1.0f, beta = 0.0f;
-
-  hostToDeviceCopy(input);
-
-  convertToFP32(input);
-
-  
-  checkCUDNN(cudnnCreatePoolingDescriptor(&poolDesc));            
-
-  int n = input->dims.dim_sizes[0];
-  int c = input->dims.dim_sizes[1];
-  int h = (input->dims.dim_sizes[2] + (2 * vertical_pad) - window_height) / vertical_stride;
-  h = h + 1;
-  int w = (input->dims.dim_sizes[3] + (2 * horizontal_pad) - window_width) / horizontal_stride;
-  w = w + 1;
-
-  DEBUG("n = %d, c = %d, h = %d, w = %d \n", n, c, h, w);
-  
-  // FIXIT: Don't be specific to floats
-  Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, n, c, h, w);
-  // Changing output tensor placement from host to device
-  changeTensorPlacement(output, DEVICE); 
-
-  // FIXIT: Fix being specific to CUDNN_DATA_FLOAT and NCHW format
-  // FIXIT: Is this setTensor even needed?
-  checkCUDNN(cudnnSetTensor4dDescriptor(output->tensor_desc,
-					CUDNN_TENSOR_NCHW,
-					CUDNN_DATA_FLOAT,
-					n, c,
-					h, w));
-
-
-  cudnnPoolingMode_t pool_mode;
-  if(poolFunction == 0)
-    pool_mode = CUDNN_POOLING_MAX;
-  else if(poolFunction == 1)
-    pool_mode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
-
-  
-  
-  // FIXIT: Make the pool function (max, min, avg) configurable
-  checkCUDNN(cudnnSetPooling2dDescriptor(poolDesc,
-					 //CUDNN_POOLING_MAX,
-					 pool_mode,
-					 CUDNN_PROPAGATE_NAN,
-					 window_height, window_width,
-					 vertical_pad, horizontal_pad,
-					 vertical_stride, horizontal_stride));
-     
-  checkCUDNN(cudnnPoolingForward(cudnnHandle, poolDesc, &alpha, input->tensor_desc,
-				 input->gpu_data, &beta, output->tensor_desc, output->gpu_data));
-
-  profileEvent("Pool_end", true);
-
-
-  #ifdef ERROR_INJECTION_ENABLED
-
-  if(op_counter >= total_ops){
-    ERROR("No accuracy flag found \n");
-  }
-  
-  int op_acc = op_accuracies[op_counter];
-  void* error_norms = tensorAddError(output, op_acc);
-  add_norms(error_norms, "tensorPooling", op_acc);
-  add_pool_overheads(input, window_height, vertical_stride, op_acc);
-
-  op_counter++;
-  
-  #endif
-
-  
-  return output;
-}
-
-
-
-
-void* tensorGemmCPU(void* lhs_ptr, void* rhs_ptr){
-
-  INFO("*** TensorGemmCPU \n");
-
-  Tensor* lhs = (Tensor*) lhs_ptr;
-  Tensor* rhs = (Tensor*) rhs_ptr;
-  
-  // The operation is done on the CPU
-  deviceToHostCopy(lhs);
-  deviceToHostCopy(rhs);
-
-  if(lhs->data_type != CUDNN_DATA_FLOAT){
-    ERROR("Currently only Floating point is supported ");
-  }
-  
-  profileEvent("tensorGemmCPU");
-  
-  INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims);
-  INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims);
-
-  // FIXIT: Need to be more aware of the implications of alpha and beta
-  //float alpha = 1.0f;
-  // float beta = 0.0f;
-  // 'm' holds the batch dimension - assuming NCHW format Tensors
-  int m = lhs->dims.dim_sizes[0];
-  // The rhs must be a 2D tensor
-  int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons
-  int k = 1;
-  // Flattening the dimensions after the batch dimension
-  // NOTE: Allowing any number of dimensions > 2 for lhs
-  for (int j = 1 ; j < lhs->dims.num_dims; j++){
-    k = k * lhs->dims.dim_sizes[j]; // input neurons
-  }
-
-  int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2];
-  // Dimension-note: Check if k is same across the two tensors
-  INFO("m = %d, n = %d, k = %d \n", m, n, k);
-  if(rhs_k != k){
-    ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k);
-  }
-
-  // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines
-  Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1);
-  // Changing output tensor placement from host to device
-  changeTensorPlacement(output, HOST); 
-
-  float* lhs_arr = (float*) lhs->host_data;
-  float* rhs_arr = (float*) rhs->host_data;
-  float* output_arr = (float*) output->host_data;
-  
-  for(int i = 0; i < m; i++){
-    for(int j = 0; j < n; j++){
-      float sum = 0.0;
-      for(int l = 0; l < k; l++){
-	float mul = lhs_arr[i*k+l] * rhs_arr[l*n+j];
-	sum = sum + mul;
-      }
-      output_arr[i*n+j] = sum;
-    }
-  }
-      
-   
-  profileEvent("tensorGemmCPU_end", true);
-  
-  return output;
-}
-
-
-
-// Reference: https://gist.github.com/peterwittek/6303527
-void* tensorGemmGPU(void* lhs_ptr, void* rhs_ptr ){ //, void* result_tensor){
-
-  INFO("*** TensorGemmGPU \n");
-  profileEvent("Mul");
-
-  Tensor* lhs = (Tensor*) lhs_ptr;
-  Tensor* rhs = (Tensor*) rhs_ptr;
-
-
-  INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims);
-  INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims);
-
-  // FIXIT: Need to be more aware of the implications of alpha and beta
-  float alpha = 1.0f, beta = 0.0f;
-  // 'm' holds the batch dimension - assuming NCHW format Tensors
-  int m = lhs->dims.dim_sizes[0];
-  // The rhs last dimension must contain the neurons
-  int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons
-  int k = 1;
-  
-  // Flattening the dimensions after the batch dimension
-  // NOTE: Allowing any number of dimensions > 2 for lhs
-  for (int j = 1 ; j < lhs->dims.num_dims; j++){
-    k = k * lhs->dims.dim_sizes[j]; // input neurons
-  }
-
-  int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2];
-  // Dimension-note: Check if k is same across the two tensors
-  INFO("m = %d, n = %d, k = %d \n", m, n, k);
-  if(rhs_k != k){
-    ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k);
-  }
-
-  Tensor* output = NULL;
-  DEBUG("Creating new TENSOR * \n");
-  output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1);
-
-  
-  /* else{
-    DEBUG("Reusing TENSOR *\n");
-    // FIXIT: Add Assertion to check for null pointer and dimension matching
-    output = (Tensor*) result_tensor;
-    // FIXIT: output value is trashing - Is this deallocated?
-    INFO("output->num_elems = %lu \n", output->data_type);
-  }
-  */
-  
-  DEBUG("Changing placement *\n");
-  // Changing output tensor placement from host to device
-  changeTensorPlacement(output, DEVICE); 
-
-  DEBUG("Changed Placement * \n\n");
-
-  hostToDeviceCopy(lhs);
-  hostToDeviceCopy(rhs);
-
-  convertToFP32(lhs);
-  convertToFP32(rhs);
-
-  
-  DEBUG("CuBlasSgemm *\n");
-   
-  // INFO: cuBlas uses column-major format
-  // INFO: The leading dimension is just the FIRST Dimension
-  // IMP: output is N * M in column-major format, M*N in row-major - what cuDNN expects
-  checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N,
-			      n, m, k,
-			      &alpha,
-			      (float*) rhs->gpu_data, n,
-			      (float*) lhs->gpu_data, k,
-			      &beta,
-			      (float*) output->gpu_data, n));  
-
-  
-  profileEvent("Mul_end", true);
-
-
-
-  #ifdef ERROR_INJECTION_ENABLED
-
-  if(op_counter >= total_ops){
-    ERROR("No accuracy flag found \n");
-  }
-  
-  int op_acc = op_accuracies[op_counter];
-  
-  void* error_norms = tensorAddError(output, op_acc);
-  add_norms(error_norms, "tensorGemm", op_acc);
-  add_gemm_overheads(lhs_ptr, rhs_ptr, op_acc);
-
-  op_counter++;
-  
-  #endif
- 
-  
-  return output;
-}
-
-
-
-
-
-
-
-void* tensorGemm(void* lhs_ptr, void* rhs_ptr){
-
-  INFO("*** TensorGemm \n");
-  profileEvent("tensorGemm");
-
-  Tensor* lhs = (Tensor*) lhs_ptr;
-  Tensor* rhs = (Tensor*) rhs_ptr;
-    
-  INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims);
-  INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims);
-
-  // FIXIT: Need to be more aware of the implications of alpha and beta
-  float alpha = 1.0f, beta = 0.0f;
-  // 'm' holds the batch dimension - assuming NCHW format Tensors
-  int m = lhs->dims.dim_sizes[0];
-  // The rhs last dimension must contain the neurons
-  int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons
-  int k = 1;
-  // Flattening the dimensions after the batch dimension
-  // NOTE: Allowing any number of dimensions > 2 for lhs
-  for (int j = 1 ; j < lhs->dims.num_dims; j++){
-    k = k * lhs->dims.dim_sizes[j]; // input neurons
-  }
-
-  int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2];
-  // Dimension-note: Check if k is same across the two tensors
-  INFO("m = %d, n = %d, k = %d \n", m, n, k);
-  if(rhs_k != k){
-    ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k);
-  }
-
-  // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines
-  Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1);
-  // Changing output tensor placement from host to device
-  changeTensorPlacement(output, DEVICE); 
-
-  hostToDeviceCopy(lhs);
-  hostToDeviceCopy(rhs);
-
-  // NOTE: cuBlas uses column-major format
-  // NOTE: The leading dimension is the FIRST Dimension
-  // NOTE: The output is N * M in column-major format, M*N in row-major - what cuDNN expects
-  checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N,
-			      n, m, k,
-			      &alpha,
-			      (float*) rhs->gpu_data, k,
-			      (float*) lhs->gpu_data, k,
-			      &beta,
-			      (float*) output->gpu_data, n));
-  
-  profileEvent("tensorGemm_end", true);
-  
-  return output;
-}
-
-
-
-
-// FIXIT: Add dimension check assertions throughout the code
-void* tensorGemmBias(void* input_ptr, void* bias_ptr){
-
-  INFO("*** TensorGemmBias \n");
-  profileEvent("tensorGemmBias");
-
-  Tensor* input = (Tensor*) input_ptr;
-  Tensor* bias = (Tensor*) bias_ptr;  
-
-  // NOTE: beta is set to 1 to append to input
-  // C = A * B + Beta * C
-  float alpha = 1.0f, beta = 1.0f;
-  // 'm' holds the batch dimension - assuming NCHW format Tensors
-  int m = input->dims.dim_sizes[0];
-  // The bias must be a 2D tensor
-  int n = bias->dims.dim_sizes[bias->dims.num_dims - 1]; // output neurons
-
-  INFO("m = %d, n = %d \n", m, n);
-  
-  hostToDeviceCopy(input);
-  hostToDeviceCopy(bias);
-
-  struct Tensor* onevec = (Tensor*) create2DTensor(CUDNN_DATA_FLOAT, m, 1);
-  fillOnes(onevec);
-  hostToDeviceCopy(onevec);
-  
-  // NOTE: cuBlas uses column-major format
-  // NOTE: The leading dimension is just the FIRST Dimension
-  checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N,
-			      n, m, 1,
-			      &alpha,
-			      (float*) bias->gpu_data, n,
-			      (float*) onevec->gpu_data, 1,
- 			      &beta,
-			      (float*) input->gpu_data, n));
-
-  profileEvent("tensorGemmBias_end", true);
-  
-  return input;
-}
-
-
-void* tensorRelu(void* input_ptr){
-
-  INFO("*** TensorRelu \n");
-  profileEvent("Relu");
-
-  Tensor* input = (Tensor*) input_ptr;
-  
-  cudnnActivationDescriptor_t reluDesc;
-  float alpha = 1.0f, beta = 0.0f;
-
-  hostToDeviceCopy(input);
-
-  convertToFP32(input);
-  
-  
-  checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc));
-
-  checkCUDNN(cudnnSetActivationDescriptor(reluDesc, CUDNN_ACTIVATION_RELU,
-					  CUDNN_PROPAGATE_NAN, 0.0));
-
-  checkCUDNN(cudnnActivationForward(cudnnHandle, reluDesc, &alpha,
-				    input->tensor_desc, input->gpu_data, &beta,
-				    input->tensor_desc, input->gpu_data));
-
-  profileEvent("Relu_end", true);
-
-
-  #ifdef ERROR_INJECTION_ENABLED
-  
-  if(op_counter >= total_ops){
-    ERROR("No accuracy flag found \n");
-  }
-  
-  int op_acc = op_accuracies[op_counter];
-    
-  void* error_norms = tensorAddError(input, op_acc);
-  add_norms(error_norms, "tensorRelu", op_acc);
-  add_relu_overheads(input, op_acc);
-  op_counter++;  
-  #endif
-  
-
-  return input;
-}
-
-
-// Think: Should Softmax be broken into multiple IR operations?
-void* tensorSoftmax(void* input_ptr){
-
-  INFO("*** TensorSoftmax \n");
-  profileEvent("Softmax");
-
-  Tensor* input = (Tensor*) input_ptr;
-  float alpha = 1.0f, beta = 0.0f;
-
-  hostToDeviceCopy(input);
-  convertToFP32(input); 
- 
-    
-  // IMP: CUDNN_SOFTMAX_ACCURATE can be replaced with a less acurate CUDNN_SOFTMAX_FAST
-  checkCUDNN(cudnnSoftmaxForward(cudnnHandle, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL,
-				 &alpha, input->tensor_desc, input->gpu_data, &beta,
-				 input->tensor_desc, input->gpu_data));
-
-  deviceToHostCopy(input);  
-  profileEvent("Softmax_end", true);
-  
-  return input;
-}
-
-
-
-__global__ void clipValues(float* A, float min, float max, int n){
-
-  int id = blockIdx.x * blockDim.x + threadIdx.x;
-
-  if(id < n){
-    A[id] = fmaxf(min, A[id]);
-    A[id] = fminf(max, A[id]);
-  }
-}
-
-
-
-void* tensorRelu2(void* input_ptr, float min, float max){
-
-  INFO("*** TensorClippedRelu *** \n");
-  profileEvent("Relu");
-
-  cudnnActivationDescriptor_t reluDesc;
-  float alpha = 1.0f, beta = 0.0f;
-  
-  Tensor* input = (Tensor*) input_ptr;
-
-  hostToDeviceCopy(input);
-
-  convertToFP32(input);
-  
-
-  checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc));
-
-  checkCUDNN(cudnnSetActivationDescriptor(reluDesc, CUDNN_ACTIVATION_CLIPPED_RELU,
-					  CUDNN_PROPAGATE_NAN, max));
-
-  checkCUDNN(cudnnActivationForward(cudnnHandle, reluDesc, &alpha,
-				    input->tensor_desc, input->gpu_data, &beta,
-				    input->tensor_desc, input->gpu_data));
-
-  
-  
-  profileEvent("Relu_end", true);
-
-
-  #ifdef ERROR_INJECTION_ENABLED
-  
-  if(op_counter >= total_ops){
-    ERROR("No accuracy flag found \n");
-  }
-  
-  int op_acc = op_accuracies[op_counter];
-  void* error_norms = tensorAddError(input, op_acc);
-  add_norms(error_norms, "tensorClippedRelu", op_acc);
-  add_relu_overheads(input, op_acc);
-  op_counter++;  
-  #endif
-  
-
-  return input;
-}
-
-
-void* tensorTanh(void* input_ptr){
-
-  INFO("*** TensorTanh \n");
-  profileEvent("Tanh");
-
-  Tensor* input = (Tensor*) input_ptr;
-  
-  cudnnActivationDescriptor_t tanhDesc;
-  float alpha = 1.0f, beta = 0.0f;
-
-  hostToDeviceCopy(input);
-
-  convertToFP32(input);
-
-  
-  checkCUDNN(cudnnCreateActivationDescriptor(&tanhDesc));
-
-  checkCUDNN(cudnnSetActivationDescriptor(tanhDesc, CUDNN_ACTIVATION_TANH,
-					  CUDNN_PROPAGATE_NAN, 0.0));
-
-  checkCUDNN(cudnnActivationForward(cudnnHandle, tanhDesc, &alpha,
-				    input->tensor_desc, input->gpu_data, &beta,
-				    input->tensor_desc, input->gpu_data));
-
-  profileEvent("Tanh_end", true);
-
-
-  #ifdef ERROR_INJECTION_ENABLED
-  
-  if(op_counter >= total_ops){
-    ERROR("No accuracy flag found \n");
-  }
-  
-  int op_acc = op_accuracies[op_counter];
-  void* error_norms = tensorAddError(input, op_acc);
-  add_norms(error_norms, "tensorTanh", op_acc);
-  add_relu_overheads(input, op_acc);
-  op_counter++;  
-  #endif
-  
-
-  return input;
-}
-
-
-
-
-void* tensorBatchNorm(void* input_ptr, void* gamma_ptr, void* beta_ptr,
-		      void* mean_ptr, void* variance_ptr, double epsilon){
-
-  INFO("*** TensorBatchNorm \n");
-  profileEvent("BatchNorm");
-
-  Tensor* input = (Tensor*) input_ptr;
-  Tensor* gamma = (Tensor*) gamma_ptr;
-  Tensor* beta = (Tensor*) beta_ptr;
-  Tensor* mean = (Tensor*) mean_ptr;
-  Tensor* variance = (Tensor*) variance_ptr;
-
-  if (input == NULL || gamma == NULL || beta == NULL || mean == NULL || variance == NULL){
-    ERROR("NULL Input Tensor");
-  }
-  
-  float alpha_val = 1.0f, beta_val = 0.0f;
-  hostToDeviceCopy(input);
-  hostToDeviceCopy(gamma);
-  hostToDeviceCopy(beta);
-  hostToDeviceCopy(mean);
-  hostToDeviceCopy(variance);
-
-  convertToFP32(input);
-
- 
-  
-  checkCUDNN(cudnnBatchNormalizationForwardInference(cudnnHandle, CUDNN_BATCHNORM_SPATIAL,
-						     &alpha_val, &beta_val,
-						     input->tensor_desc, input->gpu_data,
-						     input->tensor_desc, input->gpu_data,
-						     gamma->tensor_desc, gamma->gpu_data,
-						     beta->gpu_data, mean->gpu_data,
-						     variance->gpu_data,
-						     epsilon));
-
-  profileEvent("BatchNorm_end", true);
-
-
-  #ifdef ERROR_INJECTION_ENABLED
-  
-  if(op_counter >= total_ops){
-    ERROR("No accuracy flag found \n");
-  }
-  
-  int op_acc = op_accuracies[op_counter];
-  //op_acc = 0;  
-  void* error_norms = tensorAddError(input, op_acc);
-  add_norms(error_norms, "tensorBatchNorm", op_acc);
-  add_relu_overheads(input, op_acc);
-  op_counter++;  
-  #endif
-  
-
-  return input;
-}
-
-
-
-
-/************* GPU Layer API  *************/
-
-void* ConvLayer_GPU(void* input, 
-		    void* filter, 
-		    void* bias, 
-		    int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w,
-		    int pool_id, int pool_size,
-		    int activation_id, // Relu, Tanh, ClipRelu
-		    float out_min, float out_max){ // NOTE: min_val, max_val apply to 'ClippedRelu'
-
-  void* conv_out = tensorConvolution(input, filter,
-				     conv_pad_h, conv_pad_w,
-				     conv_stride_h, conv_stride_w,
-				     1, 0);
-  void* conv_add;
-  if(bias != NULL){
-    conv_add = tensorAdd(conv_out, bias);
-  }
-  else{
-    conv_add = conv_out;
-  }
-
-  void* activation_out;  
-  switch(activation_id){
-  case -1:
-    activation_out = conv_add;
-    INFO("NO Activation Function \n");
-    break;
-  case 0:
-    activation_out = tensorTanh(conv_add);
-    break;
-  case 1:
-    activation_out = tensorRelu(conv_add);
-    break;
-  case 2:
-    activation_out = tensorRelu2(conv_add, out_min, out_max);
-    break;
-  default:
-    ERROR("Activation id %d NOT supported \n", activation_out);
-    break;
-  }
-
-
-  void* pool_out = activation_out;
-  // NOTE: Skip pooling on negative pool sizes
-  if(pool_size > 0){
-    //FIXME: Currently only using MaxPooling
-    pool_out = tensorPooling(activation_out, 0, pool_size, pool_size, 0, 0, pool_size, pool_size);
-  }
-  else{
-    pool_out = activation_out;
-  }
-
-  return pool_out;
-}
-
-
-void* FCLayer_GPU(void* input, 
-		  void* weights, 
-		  void* bias, 
-		  int activation_id,
-		  float out_min, float out_max){ // NOTE: min_val, max_val apply to 'ClippedRelu'
-
-  void* gemm_out = tensorGemmGPU(input, weights);
-
-  void* gemmbias_out;
-  if(bias != NULL){
-    gemmbias_out = tensorAdd(gemm_out, bias);
-  }
-  else{
-    gemmbias_out = gemm_out;
-  }
- 
-  void* activation_out;
-  switch(activation_id){
-
-  case -1:
-    activation_out = gemmbias_out;
-    INFO("No Activation Function \n");
-    break;
-  case 0:
-    activation_out = tensorTanh(gemmbias_out);
-    break;
-  case 1:
-    activation_out = tensorRelu(gemmbias_out);
-    break;
-  case 2:
-    activation_out = tensorRelu2(gemmbias_out, out_min, out_max);
-    break;
-  default:
-    ERROR("Activation id %d NOT supported \n", activation_out);
-    break;
-  }
-   
-  return activation_out;
-}
-
-
-/*********** PROMISE API **************/
-
-/*
-void* ConvLayer_PROMISE(void* input, float i_min, float i_max,
-			void* filter, float w_min, float w_max,
-			void* bias, float b_min, float b_max,
-			int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w,
-			int pool_id, int pool_size,
-			int activation_id, // Relu, Tanh, ClipRelu
-			float out_min, float out_max, int swing){ 
-
-  
-  #ifdef PROMISE_TUNER_ENABLED
-
-  // NOTE: Skip reading file-based error levels for ApproxHPVM wrapper runtime
-  if(!approxhpvm_runtime_mode){
-  
-    if(op_counter >= total_ops){
-      ERROR("No accuracy flag found \n");
-    }
-  
-    swing = op_accuracies[op_counter];
-    op_counter++;
-  }
-  
-  #endif  
-
-  
-  if (swing < 0 || swing > 20){
-    ERROR("Incorrect swing value");
-  }
-
-  
-
-  if(swing < 8){
-    input = quantizeTensorPromise(input, i_min, i_max);
-    filter = quantizeTensorPromise(filter, w_min, w_max);
-    if(bias != NULL)
-      bias = quantizeTensorPromise(bias, b_min, b_max);
-    // aRead error
-    
-    input = addPromiseError(input, swing);
-  }
-
-  
-  void* conv_out;
-  if(swing == 8 || (swing >= 12 && swing <= 15) ){
-    //conv_out = tensorConvPerf(input, filter, conv_pad_h, conv_pad_w,
-    //		              conv_stride_h, conv_stride_w, 1, 1, 1, 0);
-
-    int rows = 2;
-    switch(swing){
-
-    case 12: rows = 5; break;
-    case 13: rows = 4; break;
-    case 14: rows = 3; break;
-    case 15: rows = 2; break;    
-		   
-    default: rows = 2; break;
-    }
-    
-    conv_out = tensorConvPerf2(input, filter, conv_pad_h, conv_pad_w,
-    		              conv_stride_h, conv_stride_w, 1, 1, rows, 0);
-
-    /*void* gold = tensorConvolution(input, filter,
-				   conv_pad_h, conv_pad_w,
-				   conv_stride_h, conv_stride_w,
-				   1, 0);
-
-    Norm_t* norms = calculateNormsTreeReduction((struct Tensor*) conv_out, (struct Tensor*) gold);
-
-    DEBUG("\n-------- l2_norm = %f \n", norms->l2_norm); 
-    */
-
-
-
-  /* -----
-  }
-  else if(swing == 9 || (swing >= 16 && swing <= 19) ){
-    //conv_out = tensorConvPerf(input, filter, conv_pad_h, conv_pad_w,
-    //		              conv_stride_h, conv_stride_w, 1, 1, 0, 1);
-
-
-    int cols = 2;
-    switch(swing){
-
-    case 16: cols = 5; break;
-    case 17: cols = 4; break;
-    case 18: cols = 3; break;
-    case 19: cols = 2; break;    
-		   
-    default: cols = 2; break;
-    }
-
-    
-    conv_out = tensorConvPerf2(input, filter, conv_pad_h, conv_pad_w,
-    		              conv_stride_h, conv_stride_w, 1, 1, 0, cols);
-
-
-    /*void* gold = tensorConvolution(input, filter,
-				   conv_pad_h, conv_pad_w,
-				   conv_stride_h, conv_stride_w,
-				   1, 0);
-
-    Norm_t* norms = calculateNormsTreeReduction((struct Tensor*)conv_out, (struct Tensor*) gold);
-
-    DEBUG("\n-------- l2_norm = %f \n", norms->l2_norm); 
-    */
-
-  /*------
-  }
-  else if(swing == 10){  
-    conv_out = tensorHalfConvolution(input, filter,
-				     conv_pad_h, conv_pad_w,
-				     conv_stride_h, conv_stride_w,
-				     1, 0);
-  }
-  else{
-    conv_out = tensorConvolution(input, filter,
-				 conv_pad_h, conv_pad_w,
-				 conv_stride_h, conv_stride_w,
-				 1, 0);
-  }
-  
-  void* conv_add;
-  if(bias != NULL){
-    if(swing >= 8){  
-      conv_add = tensorHalfAdd(conv_out, bias);
-    }
-    else{
-      conv_add = tensorAdd(conv_out, bias);
-    }
-  }
-  else{
-    conv_add = conv_out;
-  }
-
-  void* pool_out;
-  // NOTE: Skip pooling on negative pool sizes
-  if(pool_size > 0){
-    //FIXME: Currently only using MaxPooling
-    pool_out = tensorHalfPooling(conv_add, 0, pool_size, pool_size, 0, 0, pool_size, pool_size);
-  }
-  else{
-    pool_out = conv_add;
-  }
-  
-  void* activation_out;  
-  switch(activation_id){
-  case -1:
-    activation_out = pool_out;
-    INFO("NO Activation Function \n");
-    break;
-  case 0:
-    activation_out = tensorHalfTanh(pool_out);
-    break;
-  case 1:
-    activation_out = tensorHalfRelu(pool_out);
-    break;
-  case 2:
-    activation_out = tensorHalfRelu2(pool_out, out_min, out_max);
-    break;
-  default:
-    ERROR("Activation id %d NOT supported \n", activation_out);
-    break;
-  }
-
-
-  if(swing < 8 && activation_id != -1){
-    activation_out = quantizeTensorPromise(activation_out, out_min, out_max);
-  }
-  
-  return activation_out;
-}
-
-
-void* FCLayer_PROMISE(void* input, float i_min, float i_max,
-		      void* weights, float w_min, float w_max,
-		      void* bias, float b_min, float b_max,
-		      int activation_id,
-		      float out_min, float out_max, int swing){ //NOTE: min_val, max_val apply to 'ClippedRelu'
-
-
-  
-  #ifdef PROMISE_TUNER_ENABLED
-
-  // NOTE: Skip reading file-based error levels for ApproxHPVM wrapper runtime
-  if(!approxhpvm_runtime_mode){
-
-    if(op_counter >= total_ops){
-      ERROR("No accuracy flag found \n");
-    }
-  
-    swing = op_accuracies[op_counter];
-    op_counter++;
-  }
-  
-  #endif
- 
-  
-  if (swing < 0 || swing > 20){
-    ERROR("Incorrect swing value");
-  }
-  
-  if(swing < 8){
-    input = quantizeTensorPromise(input, i_min, i_max);
-    weights = quantizeTensorPromise(weights, w_min, w_max);
-    if(bias != NULL)
-      bias = quantizeTensorPromise(bias, b_min, b_max);
-
-    // NOTE: Modelling aRead error in PROMISE
-    input = addPromiseError(input, swing);
-  }
-
-
-  
-  void* gemm_out;
-  if(swing >= 8 && swing < 11){
-    gemm_out = tensorHalfGemm(input, weights);
-  }
-  else{
-    gemm_out = tensorGemmGPU(input, weights);
-  }
-
-  
-  void* gemmbias_out;
-  if(bias != NULL){
-    // Swing 8 corresponds to FP32
-    if(swing >= 8 && swing < 20){
-      gemmbias_out = tensorHalfAdd(gemm_out, bias);
-    }
-    else{
-      gemmbias_out = tensorAdd(gemm_out, bias);
-    }
-  }
-  else{
-    gemmbias_out = gemm_out;
-  }
- 
-  void* activation_out;
-  switch(activation_id){
-
-  case -1:
-    activation_out = gemmbias_out;
-    INFO("No Activation Function \n");
-    break;
-  case 0:
-    activation_out = tensorTanh(gemmbias_out);
-    break;
-  case 1:
-    activation_out = tensorRelu(gemmbias_out);
-    break;
-  case 2:
-    activation_out = tensorRelu2(gemmbias_out, out_min, out_max);
-    break;
-  default:
-    ERROR("Activation id %d NOT supported \n", activation_out);
-    break;
-  }
-  
-  
-  if(swing < 8 && activation_id != -1){
-    activation_out = quantizeTensorPromise(activation_out, out_min, out_max);
-  }
-  
-  return activation_out;
-}
-
-*****/
-
-
-
-/**** Wrapper Runtime API ***/
-  
-void* wrapper_ConvLayer(const char* hpvm_node_id,
-			void* input, 
-		        void* filter, 
-		        void* bias, 
-		        int conv_pad_h, int conv_pad_w,
-		        int conv_stride_h, int conv_stride_w,
-		        int pool_id, int pool_size,
-		        int activation_id,
-		        // NOTE: out_min, out_max are only relevant for ClippedRelu
-		        float out_min, float out_max){
-
-  NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
-
-  if (NodeConf->isPROMISENodeConfiguration()) {
-    DEBUG("PROMISE Configuration for ConvLayer\n");
-    // Mapped to PROMISE - get a PROMISE node configuration
-    PROMISENodeConfiguration *PROMISEConf = (PROMISENodeConfiguration *)NodeConf;
-    std::vector<float> &QRanges = RC->getQuantizationRanges(hpvm_node_id);
-
-    std::vector<std::pair<PROMISENodeConfiguration::APPROX, int> > &approxTuples =
-      PROMISEConf->getApproxChoices();
-
-    if (approxTuples.size() == 1) {
-      enum PROMISENodeConfiguration::APPROX approx = approxTuples[0].first;
-      int param = approxTuples[0].second;
-      if (approx == PROMISENodeConfiguration::APPROX::SWING_LEVEL) {
-        DEBUG("Approximation choice for ConvLayer: swing level %d\n", param);
-
-        struct Tensor* input_tensor_cast = (struct Tensor*) input;
-        struct Tensor* filter_tensor_cast = (struct Tensor*) filter;
-        std::pair<double, double> pinfo =
-          RC->conv_profile(input_tensor_cast->dims.dim_sizes[0], //n
-                           input_tensor_cast->dims.dim_sizes[1], //c
-                           input_tensor_cast->dims.dim_sizes[2], //h
-                           input_tensor_cast->dims.dim_sizes[3], //w
-                           filter_tensor_cast->dims.dim_sizes[0], //c_out
-                           filter_tensor_cast->dims.dim_sizes[1], //c_in
-                           filter_tensor_cast->dims.dim_sizes[2], //k_h
-                           filter_tensor_cast->dims.dim_sizes[3], //k_w
-                           conv_stride_h, //s_h
-                           conv_stride_w, //s_w
-                           param, //voltage_swing
-                           filter_tensor_cast->dims.dim_sizes[2] *
-                             filter_tensor_cast->dims.dim_sizes[3] /*patch_factor: k_h*k_w*/);
-        RC->addToCurrentIterationComputeTime("ConvLayer_PROMISE", pinfo.first);
-        RC->addToCurrentIterationComputeEnergy("ConvLayer_PROMISE", pinfo.second);
-        void* t_out;
-        t_out = PROMISE_Conv(input, QRanges[0], QRanges[1],
-                            filter, QRanges[2], QRanges[3],
-                            bias, QRanges[4], QRanges[5],
-                            conv_pad_h, conv_pad_w,
-                            conv_stride_h, conv_stride_w,
-                            pool_id, pool_size,
-                            activation_id,
-                            QRanges[6], QRanges[7], param);
-
-        return t_out;
-      } else {
-        CUSTOM_ASSERT(false && "Unknown approximation type");
-        ERROR("Unknown approximation type");
-        abort();
-      }
-      // TODO additional approx methods implemented here
-
-    } else if (approxTuples.size() == 2) {
-      ERROR("Currently unsupported case");
-      abort();
-    } else {
-      ERROR("Unsupported case");
-      abort();
-    }
-  }
-  else
-  if (NodeConf->isGPUNodeConfiguration()) {
-    DEBUG("GPU Configuration for ConvLayer\n");
-    // Mapped to GPU - get a GPU node configuration
-    GPUNodeConfiguration *GPUConf = (GPUNodeConfiguration *)NodeConf;
-
-    std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP,
-                            std::vector< std::pair<GPUNodeConfiguration::APPROX,
-                                                   int> > > > &ApproxChoices =
-      GPUConf->getApproxChoices();
-
-    // Check for convolution as first operation
-    CUSTOM_ASSERT((ApproxChoices.size() >= 1) &&
-           (ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::CONV) &&
-           "Incorrect number/type of operations in provided Conv layer configuration");
-
-    void* conv_out = handleTensorConvApproximationTuples(ApproxChoices[0].second,
-                       input, filter, conv_pad_h, conv_pad_w,
-                       conv_stride_h, conv_stride_w);
-    void* add_out;
-    if (bias != NULL) {
-      // Check for add as second operation
-      CUSTOM_ASSERT((ApproxChoices.size() >= 2) &&
-             (ApproxChoices[1].first == GPUNodeConfiguration::TENSOR_OP::ADD) &&
-             "Incorrect number/type of operations in provided Conv layer configuration");
-      add_out = handleTensorAddApproximationTuples(ApproxChoices[1].second,
-                                                   conv_out, bias);
-    } else {
-      add_out = conv_out;
-    }
-
-    void* activation_out;
-    switch (activation_id) {
-      case -1:
-        { // No activation
-          INFO("No activation Function\n");
-          activation_out = add_out;
-        }
-        break;
-      case 0:
-        { // TanH activation
-          CUSTOM_ASSERT((ApproxChoices.size() >= 3) &&
-                 (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::TANH) &&
-                 "Incorrect number/type of operations in provided Conv layer configuration");
-          activation_out = handleTensorTanhApproximationTuples(ApproxChoices[2].second,
-                                                               add_out);
-        }
-        break;
-      case 1:
-        { // ReLU activation
-          CUSTOM_ASSERT((ApproxChoices.size() >= 3) &&
-                 (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::RELU) &&
-                 "Incorrect number/type of operations in provided Conv layer configuration");
-          activation_out = handleTensorReluApproximationTuples(ApproxChoices[2].second,
-                                                               add_out);
-        }
-        break;
-      case 2:
-        { // Clipped ReLU activation
-          CUSTOM_ASSERT((ApproxChoices.size() >= 3) &&
-                 (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU) &&
-                 "Incorrect number/type of operations in provided Conv layer configuration");
-          activation_out =
-            handleTensorClippedReluApproximationTuples(ApproxChoices[2].second,
-                                                       add_out, out_min, out_max);
-        }
-        break;
-      default:
-        {
-          ERROR("Activation id %d NOT supported \n", activation_id);
-        }
-        break;
-    }
-
-    void* pool_out;
-
-    if (pool_size > 0) {
-      switch (pool_id) {
-        case 0:
-          {
-            // If we remove the asserts, we can have all cases handled by a single call
-            CUSTOM_ASSERT((ApproxChoices.back().first == GPUNodeConfiguration::TENSOR_OP::POOL_MAX) &&
-                  "Expected POOL_MAX in provided Conv layer configuration");
-            pool_out =
-              handleTensorPoolingApproximationTuples(ApproxChoices.back().second,
-                                                     activation_out, pool_id,
-                                                     pool_size, pool_size, 0, 0,
-                                                     pool_size, pool_size);
-          }
-          break;
-        case 1:
-          {
-            CUSTOM_ASSERT((ApproxChoices.back().first == GPUNodeConfiguration::TENSOR_OP::POOL_MEAN) &&
-                  "Expected POOL_MEAN in provided Conv layer configuration");
-            pool_out =
-              handleTensorPoolingApproximationTuples(ApproxChoices.back().second,
-                                                     activation_out, pool_id,
-                                                     pool_size, pool_size, 0, 0,
-                                                     pool_size, pool_size);
-          }
-          break;
-        case 2:
-          {
-            CUSTOM_ASSERT((ApproxChoices.back().first == GPUNodeConfiguration::TENSOR_OP::POOL_MIN) &&
-                  "Expected POOL_MIN in provided Conv layer configuration");
-            pool_out =
-              handleTensorPoolingApproximationTuples(ApproxChoices.back().second,
-                                                     activation_out, pool_id,
-                                                     pool_size, pool_size, 0, 0,
-                                                     pool_size, pool_size);
-          }
-          break;
-        default:
-          {
-            ERROR("Pool id %d NOT supported \n", pool_id);
-          }
-          break;
-      }
-    } else {
-      pool_out = activation_out;
-    }
-    return pool_out;
-  }
-  else {
-    ERROR("Unsupported Configuration");
-    abort();
-  }
-
-  return NULL;
-}
-
-
-void* wrapper_FCLayer(const char* hpvm_node_id,
-		      void* input, 
-		      void* weights, 
-		      void* bias, 
-		      int activation_id,
-		      // NOTE: out_min and out_max are only relevant for ClippedRelu
-		      float out_min, float out_max){ 
-
-  NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id);
-
-  if (NodeConf->isPROMISENodeConfiguration()) {
-    DEBUG("PROMISE Configuration for FCLayer\n");
-    // Mapped to PROMISE - get a PROMISE node configuration
-    PROMISENodeConfiguration *PROMISEConf = (PROMISENodeConfiguration *)NodeConf;
-    std::vector<float> &QRanges = RC->getQuantizationRanges(hpvm_node_id);
-
-    std::vector<std::pair<PROMISENodeConfiguration::APPROX, int> > &approxTuples =
-      PROMISEConf->getApproxChoices();
-
-    if (approxTuples.size() == 1) {
-      enum PROMISENodeConfiguration::APPROX approx = approxTuples[0].first;
-      int param = approxTuples[0].second;
-      if (approx == PROMISENodeConfiguration::APPROX::SWING_LEVEL) {
-        DEBUG("Approximation choice for FCLayer: swing level %d\n", param);
-
-        struct Tensor* input_tensor_cast = (struct Tensor*) input;
-        struct Tensor* weights_tensor_cast = (struct Tensor*) weights;
-        CUSTOM_ASSERT((input_tensor_cast->dims.dim_sizes[1] *
-                       input_tensor_cast->dims.dim_sizes[2] *
-                       input_tensor_cast->dims.dim_sizes[3] ==
-                         weights_tensor_cast->dims.dim_sizes[2]) &&
-                      "Dimensions for matrix multiplication do not match.");
-        std::pair<double, double> pinfo =
-          RC->fc_profile(input_tensor_cast->dims.dim_sizes[0], //num_rows_a,
-                         input_tensor_cast->dims.dim_sizes[1] *
-                           input_tensor_cast->dims.dim_sizes[2] *
-                           input_tensor_cast->dims.dim_sizes[3], //num_cols_a,
-                         weights_tensor_cast->dims.dim_sizes[2], //num_rows_b,
-                         weights_tensor_cast->dims.dim_sizes[3], //num_cols_b,
-                         param, //voltage_swing,
-                         1 /*patch_factor*/);
-        RC->addToCurrentIterationComputeTime("FCLayer_PROMISE", pinfo.first);
-        RC->addToCurrentIterationComputeEnergy("FCLayer_PROMISE", pinfo.second);
-        void* t_out;
-        t_out = PROMISE_FC(input, QRanges[0], QRanges[1],
-                           weights, QRanges[2], QRanges[3],
-                           bias, QRanges[4], QRanges[5],
-                           activation_id,
-                           QRanges[6], QRanges[7], param);
-        return t_out;
-      } else {
-        CUSTOM_ASSERT(false && "Unknown approximation type");
-        ERROR("Unknown approximation type");
-        abort();
-      }
-      // TODO additional approx methods implemented here
-
-    } else if (approxTuples.size() == 2) {
-      ERROR("Currently unsupported case");
-      abort();
-    } else {
-      ERROR("Unsupported case");
-      abort();
-    }
-  }
-  else
-  if (NodeConf->isGPUNodeConfiguration()) {
-    DEBUG("GPU Configuration for FCLayer\n");
-    // Mapped to GPU - get a GPU node configuration
-    GPUNodeConfiguration *GPUConf = (GPUNodeConfiguration *)NodeConf;
-
-    std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP,
-                            std::vector< std::pair<GPUNodeConfiguration::APPROX,
-                                                   int> > > > &ApproxChoices =
-      GPUConf->getApproxChoices();
-
-    // Approximation choices must be for a FC wrapper operation
-    CUSTOM_ASSERT((ApproxChoices.size() == 2 || ApproxChoices.size() == 3) &&
-         ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::MUL &&
-         ApproxChoices[1].first == GPUNodeConfiguration::TENSOR_OP::ADD &&
-         "Invalid configuration generated for FC layer wrapper operation");
-
-    void* gemm_out = handleTensorMulApproximationTuples(ApproxChoices[0].second,
-                                                        input, weights);
-    void* add_out = handleTensorAddApproximationTuples(ApproxChoices[1].second,
-                                                        gemm_out, bias);
-
-    void* activation_out;
-    switch (activation_id) {
-      case -1:
-        { // No activation
-          CUSTOM_ASSERT((ApproxChoices.size() == 2) &&
-                 "Incorrect number of operations in provided FC layer configuration");
-          INFO("No activation Function\n");
-          activation_out = add_out;
-        }
-        break;
-      case 0:
-        { // TanH activation
-          CUSTOM_ASSERT((ApproxChoices.size() == 3) &&
-                 (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::TANH) &&
-                 "Incorrect number/type of operations in provided FC layer configuration");
-          activation_out = handleTensorTanhApproximationTuples(ApproxChoices[1].second,
-                                                               add_out);
-        }
-        break;
-      case 1:
-        { // ReLU activation
-          CUSTOM_ASSERT((ApproxChoices.size() == 3) &&
-                 (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::RELU) &&
-                 "Incorrect number/type of operations in provided FC layer configuration");
-          activation_out = handleTensorReluApproximationTuples(ApproxChoices[1].second,
-                                                               add_out);
-        }
-        break;
-      case 2:
-        { // Clipped ReLU activation
-          CUSTOM_ASSERT((ApproxChoices.size() == 3) &&
-                 (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU) &&
-                 "Incorrect number/type of operations in provided FC layer configuration");
-          activation_out =
-            handleTensorClippedReluApproximationTuples(ApproxChoices[1].second,
-                                                       add_out, out_min, out_max);
-        }
-        break;
-      default:
-        {
-          ERROR("Activation id %d NOT supported \n", activation_id);
-        }
-        break;
-    }
-    return activation_out;
-  }
-  else {
-    ERROR("Unsupported Configuration");
-    abort();
-  }
-
-  return NULL;
-}
-
-
-
-
-void* wrapper_tensorRelu(const char* hpvm_node_id, void* input_ptr){
-//  return tensorRelu(input_ptr);
-
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-    (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
-
-  std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP,
-                          std::vector< std::pair<GPUNodeConfiguration::APPROX,
-                                                 int> > > > &ApproxChoices =
-    GPUConf->getApproxChoices();
-
-  // Approximation choices must be for a relu operation
-  CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
-         ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::RELU &&
-         "Invalid configuration generated for tensor relu wrapper operation");
-
-  return handleTensorReluApproximationTuples(ApproxChoices[0].second,
-                                             input_ptr);
-
-}
-
-void* wrapper_tensorClippedRelu(const char* hpvm_node_id,
-                                void* input_ptr,
-                                float out_min, float out_max){
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-    (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
-
-  std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP,
-                          std::vector< std::pair<GPUNodeConfiguration::APPROX,
-                                                 int> > > > &ApproxChoices =
-    GPUConf->getApproxChoices();
-
-  // Approximation choices must be for a relu operation
-  CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
-         ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU &&
-         "Invalid configuration generated for tensor clipped relu wrapper operation");
-
-  return handleTensorClippedReluApproximationTuples(ApproxChoices[0].second,
-                                                    input_ptr, out_min, out_max);
-
-}
-
-void* wrapper_tensorTanh(const char* hpvm_node_id, void* input_ptr){
-//  return tensorTanh(input_ptr);
-
-  GPUNodeConfiguration *GPUConf =
-    (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
-
-  std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP,
-                          std::vector< std::pair<GPUNodeConfiguration::APPROX,
-                                                 int> > > > &ApproxChoices =
-    GPUConf->getApproxChoices();
-
-  // Approximation choices must be for a tanh operation
-  CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
-         ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::TANH &&
-         "Invalid configuration generated for tensor tanh wrapper operation");
-
-  return handleTensorTanhApproximationTuples(ApproxChoices[0].second,
-                                             input_ptr);
-
-}
-
-
-void* wrapper_tensorBatchNorm(const char* hpvm_node_id,
-			      void* input_ptr, void* gamma_ptr, void* beta_ptr,
-			      void* mean_ptr, void* variance_ptr, double epsilon){
-//  return tensorBatchNorm(input_ptr, gamma_ptr, beta_ptr, mean_ptr, variance_ptr, epsilon);
-
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-    (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
-
-  std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP,
-                          std::vector< std::pair<GPUNodeConfiguration::APPROX,
-                                                 int> > > > &ApproxChoices =
-    GPUConf->getApproxChoices();
-
-  // Approximation choices must be for a batchnorm operation
-  CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
-         ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::BATCHNORM &&
-         "Invalid configuration generated for tensor batchnorm wrapper operation");
-
-  return handleTensorBatchNormApproximationTuples(ApproxChoices[0].second,
-                                                  input_ptr, gamma_ptr, beta_ptr,
-                                                  mean_ptr, variance_ptr, epsilon);
-
-}
-
-
-void* wrapper_tensorAdd(const char* hpvm_node_id, void* input_ptr, void* bias_ptr){
-//  return tensorAdd(input_ptr, bias_ptr);
-
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-    (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
-
-  std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP,
-                          std::vector< std::pair<GPUNodeConfiguration::APPROX,
-                                                 int> > > > &ApproxChoices =
-    GPUConf->getApproxChoices();
-
-  // Approximation choices must be for an add operation
-  CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
-         ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::ADD &&
-         "Invalid configuration generated for tensor add wrapper operation");
-
-  return handleTensorAddApproximationTuples(ApproxChoices[0].second,
-                                            input_ptr, bias_ptr);
-
-}
-
-
-void* wrapper_tensorPooling(const char* hpvm_node_id,
-			    void* input_ptr,
-			    int poolFunction,
-			    int window_height, int window_width,
-			    int vertical_pad, int horizontal_pad,
-			    int vertical_stride, int horizontal_stride){
-
-//  return tensorPooling(input_ptr, poolFunction, window_height, window_width,
-//		       vertical_pad, horizontal_pad, vertical_stride, horizontal_stride);
-
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-    (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
-
-  std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP,
-                          std::vector< std::pair<GPUNodeConfiguration::APPROX,
-                                                 int> > > > &ApproxChoices =
-    GPUConf->getApproxChoices();
-
-  // Approximation choices must be for a single operation
-  CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
-                "Invalid configuration generated for tensor pool wrapper operation");
-  enum GPUNodeConfiguration::TENSOR_OP top = ApproxChoices[0].first;
-  // Approximation choices must be for a pool operation
-  CUSTOM_ASSERT((top == GPUNodeConfiguration::TENSOR_OP::POOL_MAX  ||
-                 top == GPUNodeConfiguration::TENSOR_OP::POOL_MEAN ||
-                 top == GPUNodeConfiguration::TENSOR_OP::POOL_MIN) &&
-         "Invalid configuration generated for tensor pool wrapper operation");
-
-  return handleTensorPoolingApproximationTuples(ApproxChoices[0].second,
-                                                input_ptr, poolFunction,
-                                                window_height, window_width,
-                                                vertical_pad, horizontal_pad,
-                                                vertical_stride, horizontal_stride);
-
-}
-
-
-void* wrapper_tensorGroupConvolution(const char* hpvm_node_id,
-                                     void* input, void* filter,
-				                     int vertical_pad, int horizontal_pad,
-				                     int vertical_stride, int horizontal_stride,
-				                     int conv_mode, int conv_groups){
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-    (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
-
-  std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP,
-                          std::vector< std::pair<GPUNodeConfiguration::APPROX,
-                                                 int> > > > &ApproxChoices =
-    GPUConf->getApproxChoices();
-
-  // Approximation choices must be for a group_conv operation
-  CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
-         ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::GROUP_CONV &&
-         "Invalid configuration generated for tensor group_conv wrapper operation");
-
-  return handleTensorGroupConvApproximationTuples(ApproxChoices[0].second,
-                                                  input, filter,
-                                                  vertical_pad, horizontal_pad,
-                                                  vertical_stride, horizontal_stride,
-                                                  conv_mode, conv_groups);
-
-}
-
-
-
-void* wrapper_tensorSoftmax(const char* hpvm_node_id, void* input_ptr){
-//  return tensorSoftmax(input_ptr);
-
-  // Only mapped to GPU - get a GPU configuration
-  GPUNodeConfiguration *GPUConf =
-    (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id);
-
-  std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP,
-                          std::vector< std::pair<GPUNodeConfiguration::APPROX,
-                                                 int> > > > &ApproxChoices =
-    GPUConf->getApproxChoices();
-
-  // Approximation choices must be for a softmax operation
-  CUSTOM_ASSERT(ApproxChoices.size() == 1 &&
-         ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::SOFTMAX &&
-         "Invalid configuration generated for tensor softmax wrapper operation");
-
-  return handleTensorSoftmaxApproximationTuples(ApproxChoices[0].second, input_ptr);
-
-
-}
-
-
-
diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/wrapper_runtime_back.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/wrapper_runtime_back.cu
deleted file mode 100644
index f6c4fff296debd5bd0f5c5287ee28824b00c1380..0000000000000000000000000000000000000000
--- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/wrapper_runtime_back.cu
+++ /dev/null
@@ -1,123 +0,0 @@
-
-#include <stdio.h>
-#include <cstdio>
-#include <cstdlib>
-#include <iostream>
-#include <string>
-#include <vector>
-
-#include <cuda_runtime.h>
-#include <device_launch_parameters.h>
-
-#include <cublas_v2.h>
-#include <cudnn.h>
-#include <cublas_api.h>
-#include <cuda_fp16.h>
-#include <driver_types.h>
-
-
-// Tensor runtime header files
-#include "../include/tensor_runtime.h"
-#include "../include/tensor_utils.h"
-#include "../include/debug.h"
-#include "../include/profiling.h"
-#include "../include/fp16_conversion.h"
-#include "../include/global_data.h"
-#include "../include/error.h"
-#include "../include/tensor.h"
-#include "../include/op_overheads.h"
-#include "../include/half_precision_api.h"
-
-
-
-/*********** Generic Layer API **************/
-
-void* ConvLayerWrapper(void* input, 
-		void* filter, 
-		void* bias, 
-		int conv_pad_h, int conv_pad_w,
-		int conv_stride_h, int conv_stride_w,
-		int pool_id, int pool_size,
-		int activation_id,
-		// NOTE: out_min, out_max are only relevant for ClippedRelu
-		float out_min, float out_max){
- 			
-  
-  void* conv_out = tensorConvolution(input, filter,
-				     conv_pad_h, conv_pad_w,
-				     conv_stride_h, conv_stride_w,
-				     1, 0);
-  
-  void* conv_add = tensorAdd(conv_out, bias);
-
-  void* pool_out;
-  // NOTE: Skip pooling when pool size is not greater than 0
-  if(pool_size > 0){
-    //FIXME: Currently only using MaxPooling
-    pool_out = tensorPooling(conv_add, 0, pool_size, pool_size, 0, 0, pool_size, pool_size);
-  }
-  else{
-    pool_out = conv_add;
-  }
-  
-  void* activation_out;  
-  switch(activation_id){
-  case -1:
-    activation_out = pool_out;
-    INFO("NO Activation Function \n");
-    break;
-  case 0:
-    activation_out = tensorTanh(pool_out);
-    break;
-  case 1:
-    activation_out = tensorRelu(pool_out);
-    break;
-  case 2:
-    activation_out = tensorRelu2(pool_out, out_min, out_max);
-    break;
-  default:
-    ERROR("Activation id %d NOT supported \n", activation_out);
-    break;
-  }
-
-  return activation_out;
-}
-
-
-void* FCLayerWrapper(void* input, 
-	      void* weights, 
-	      void* bias, 
-	      int activation_id,
-	      // NOTE: out_min and out_max are only relevant for ClippedRelu
-	      float out_min, float out_max){ 
-
-  
-  void* gemm_out = tensorGemmGPU(input, weights);
-
-  void* gemmbias_out = tensorAdd(gemm_out, bias);
-  
-  void* activation_out;
-  switch(activation_id){
-
-  case -1:
-    activation_out = gemmbias_out;
-    INFO("No Activation Function \n");
-    break;
-  case 0:
-    activation_out = tensorTanh(gemmbias_out);
-    break;
-  case 1:
-    activation_out = tensorRelu(gemmbias_out);
-    break;
-  case 2:
-    activation_out = tensorRelu2(gemmbias_out, out_min, out_max);
-    break;
-  default:
-    ERROR("Activation id %d NOT supported \n", activation_out);
-    break;
-  }
-  
-  return activation_out;
-}
-
-