diff --git a/hpvm/projects/hpvm-tensor-rt/bin/legacy/driver.py b/hpvm/projects/hpvm-tensor-rt/bin/legacy/driver.py deleted file mode 100644 index 6ff0e643f26e1120b70003f62d88a0a0160423ce..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/legacy/driver.py +++ /dev/null @@ -1,505 +0,0 @@ -from collections import defaultdict -import os -import subprocess -import sys - -class Driver: - fp16_swing = 8 - - class PrecisionTypes: - FP16 = 0 - FP32 = 1 - PROMISE = 2 - - class ApproxTypes: - PERF = 3 - SAMP = 4 - REDUCE = 5 - FFT = 6 - - results_time_key = "Time" - results_energy_key = "Energy" - - - def __init__(self, layer_filename, table_filename, config_filename, results_filename): - self.__layer_filename = layer_filename - self.__table_filename = table_filename - self.__config_filename = config_filename - self.__results_filename = results_filename - - # NOTE: Use an OrderedDict if we want to search by operation name - # Using a list bc we care about the order the data is read in - # since it corresponds to the data in the configuration file - self.__tensor_layers = [] - - # [layer_name][operation_name][cols] - # Operation names need to be stored in order of insertion - self.__tensor_table = defaultdict(lambda: list(defaultdict(str))) - - self.__conf_results = [] # indexed - #self.__conf_results = {} # {conf name: (first line, [[layer value if promise], [tensor vals if gpu]])} - - - @staticmethod - def is_conv(operation_name): - return operation_name.startswith("Conv") - - - @staticmethod - def is_nml(operation_name): - return operation_name.startswith("NML") - - - @staticmethod - def is_fc(operation_name): - return operation_name.startswith("FC") - - - # FOR DEBUGGING ONLY - def __get_str(self, appr): - if appr == Driver.PrecisionTypes.FP16: - return "FP16" - elif appr == Driver.PrecisionTypes.FP32: - return "FP32" - elif appr == Driver.PrecisionTypes.PROMISE: - return "PROMISE" - elif appr == Driver.ApproxTypes.PERF: - return "PERF" - elif appr == Driver.ApproxTypes.SAMP: - return "SAMP" - elif appr == Driver.ApproxTypes.REDUCE: return "REDUCE" - elif appr == Driver.ApproxTypes.FFT: return "FFT" - - def driver(self): - self.__parse_tensor_layer_file() - self.__parse_tensor_table() - self.__run_simulations() - self.__write_output() - - - def __parse_tensor_layer_file(self): - if self.__layer_filename == "NONE": return - if not os.path.isfile(self.__layer_filename): - print("ERROR: %s was not found." % self.__layer_filename) - exit(1) - layer_file = open(self.__layer_filename, "r") - for line in layer_file: - layer_data = line.strip().split(',') - layer_name = layer_data[0] - - tensor_layer = defaultdict(str) - tensor_layer["Name"] = layer_name - - if Driver.is_conv(layer_name): - tensor_layer["N"] = float(layer_data[1]) - tensor_layer["Cin"] = float(layer_data[2]) - tensor_layer["H"] = float(layer_data[3]) - tensor_layer["W"] = float(layer_data[4]) - tensor_layer["Cout"] = float(layer_data[5]) - tensor_layer["Kh"] = float(layer_data[7]) - tensor_layer["Kw"] = float(layer_data[8]) - tensor_layer["Sh"] = float(layer_data[9]) - tensor_layer["Sw"] = float(layer_data[10]) - - elif Driver.is_fc(layer_name): - tensor_layer["RA"] = float(layer_data[1]) - tensor_layer["CA"] = float(layer_data[2]) - tensor_layer["RB"] = float(layer_data[3]) - tensor_layer["CB"] = float(layer_data[4]) - - elif not Driver.is_nml(layer_name): # TODO should we store data for NMLs? - print("ERROR: Invalid layer name %s" % layer_name) - exit(1) - - self.__tensor_layers.append(tensor_layer) - layer_file.close() - - - def __parse_tensor_table(self): - if not os.path.isfile(self.__table_filename): - print("ERROR: %s was not found." % self.__table_filename) - exit(1) - table_file = open(self.__table_filename, "r") - line = table_file.readline().strip() - - while line: - # Line here MUST be a header or there's a bug - # Get the description of the layer - assert(line.startswith("**")) - header_contents = line.split(' ')[1:] - layer_name = header_contents[0] - num_ops = int(header_contents[1]) - col_names = header_contents[2:] - - layer_operations = [] - - # Go through all operations in the layer - for op_count in range(num_ops): - operation_data = defaultdict(str) - - line = table_file.readline().strip() - op_data = line.split(' ') - op_name = op_data[0] - operation_data["Name"] = op_name - - # Number of data items (#s) needs to match up with the # of cols - assert(len(op_data) - 1 == len(col_names)) - - # Go through all data items (each col element) per operation - for i in range(len(col_names)): - operation_data[col_names[i]] = float(op_data[i + 1]) - - layer_operations.append(operation_data) - - self.__tensor_table[layer_name] = layer_operations - line = table_file.readline().strip() - table_file.close() - - - @staticmethod - def is_promise(layer_hardware): - return layer_hardware == "promise" - - @staticmethod - def is_gpu(layer_hardware): - return layer_hardware == "gpu" - - def is_fp32(self, line): - return line.find("fp32") != -1 or line.find("red_samp 41") != -1 or line.find("red_samp 43") != -1 or line.find("red_samp 45") != -1 - - def __run_simulations(self): - config_file = open(self.__config_filename, "r") - line = config_file.readline().strip() - - while line: - assert(line == "+++++") - print("CONFIGURATION") - - curr_conf_results = [] - - prev_layer = Driver.PrecisionTypes.FP32 - curr_layer = None - - line = config_file.readline().strip() - first_line = line - conf_name = line.split(' ')[0] - print("CONF NAME: %s" % conf_name) - assert(conf_name.startswith("conf")) - line = config_file.readline().strip() - - while line != "-----": - layer_as_lst = line.split(' ') - layer_results = [] - # Skip softmax - if line.find("softmax") != -1: - layer_results.append((0, 0, ' '.join(layer_as_lst[2:]))) - curr_conf_results.append((layer_as_lst[1], layer_results)) - line = config_file.readline().strip() - continue - layer_ind = int(layer_as_lst[0]) - 1 - layer_table_data = self.__tensor_layers[layer_ind] - layer_name = layer_table_data["Name"] - - if Driver.is_gpu(layer_as_lst[1]): - print("Running layer %s on the GPU" % layer_name) - - tensor_count = 0 - - # 3 elements per tensor operation - for i in range(2, len(layer_as_lst), 3): - op_type = layer_as_lst[i] # map2 fp32 1 - precision_type = layer_as_lst[i + 1] # or approx type - op_number = layer_as_lst[i + 2] - print(' '.join(layer_as_lst[i : i + 3])) - - approx_type = None - if self.is_fp32(line): #line.find("fp32") != -1 or line.find("red_samp 41") != -1 or line.find("red_samp 43") != -1 or line.find("red_samp 45") != -1: - curr_layer = Driver.PrecisionTypes.FP32 - print("FP32") - else: - curr_layer = Driver.PrecisionTypes.FP16 - print("FP16") - if precision_type == "perf" or precision_type == "samp" or precision_type == "red_samp": # Handle approx type - if precision_type == "perf": - approx_type = Driver.ApproxTypes.PERF - elif precision_type == "samp": - approx_type = Driver.ApproxTypes.SAMP - elif precision_type == "red_samp": - precision_type = "reduce" - approx_type = Driver.ApproxTypes.REDUCE - elif precision_type == "fft": - approx_type = Driver.ApproxTypes.FFT - - quant_time, quant_energy = 0, 0#self.__quantize(precision_type, op_number, curr_layer, prev_layer, tensor_count, layer_table_data) - if quant_time != 0: - assert i == 2 #and layer_ind == 0 - conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, \ - tensor_count, approx_type, op_number) - print(quant_time, conv_time) - layer_results.append((quant_time + conv_time, quant_energy + conv_energy, ' '.join(layer_as_lst[i : i + 3]))) - prev_layer = curr_layer - tensor_count += 1 - - line = config_file.readline().strip() - prev_layer = curr_layer - curr_conf_results.append((layer_as_lst[1], layer_results)) - - if not self.__conf_results: # we're appending the baseline - # need to find the fp16 baseline - self.fp16_baseline = [] - - prev_layer = Driver.PrecisionTypes.FP32 - curr_layer = None - - has_quantized = False - for layer_ind, (hardware, layer) in enumerate(curr_conf_results): - if layer[0][2].find("softmax") != -1: continue - fp16_layer = [] - layer_table_data = self.__tensor_layers[layer_ind] - layer_name = layer_table_data["Name"] - - for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer): - curr_layer = Driver.PrecisionTypes.FP16 # always - - quant_time, quant_energy = self.__quantize("fp16", "1", curr_layer, prev_layer, tensor_ind, layer_table_data) - if quant_time != 0: - assert not has_quantized - has_quantized = True - tensor_info = self.__tensor_table[layer_name][tensor_ind] - fp16_time = tensor_info["fp16_time"] + quant_time - fp16_energy = tensor_info["fp16_energy"] + quant_energy - fp16_layer.append((fp16_time, fp16_energy, tensor_op.replace("fp32", "fp16"))) - prev_layer = curr_layer - - prev_layer = curr_layer - self.fp16_baseline.append((hardware, fp16_layer)) - self.__conf_results.append( (first_line, curr_conf_results) ) - line = config_file.readline().strip() - config_file.close() - - - def __quantize(self, precision_type, op_number, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): - if curr_layer == prev_layer or curr_layer == Driver.PrecisionTypes.PROMISE \ - or prev_layer == Driver.PrecisionTypes.PROMISE: - return 0.0, 0.0 - layer_name = layer_data["Name"] - print("DATA: ", precision_type, op_number, self.__get_str(curr_layer), self.__get_str(prev_layer), h2f_f2h_operation_ind) - # NOTE: Ignoring logic where curr == promise or prev == promise bc - # smartDMA is always true so we'd return near the beginning of the method - - # Get h2f/f2h data using the first tensor operation in the layer - # (which is why order matters in the tensor table) - tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind] - time_key = None - energy_key = None - - print("prec type and op number:", precision_type, op_number) - if op_number == "1": lookup_key = "_" #lookup_key = precision_type - else: lookup_key = "_" + precision_type + str(op_number) + "_" - print(lookup_key) - if curr_layer == Driver.PrecisionTypes.FP32: - lookup_key = lookup_key.replace("41", "42").replace("43", "44").replace("45", "46") - time_key = "h2f%stime" % lookup_key - energy_key = "h2f%senergy" % lookup_key - elif curr_layer == Driver.PrecisionTypes.FP16: - time_key = "f2h%stime" % lookup_key - energy_key = "f2h%senergy" % lookup_key - - print("QUANTIZATION KEYS", time_key, energy_key) - time = tensor_op_row[time_key] - energy = tensor_op_row[energy_key] - #if not time: time = 0.0 - #if not energy: energy = 0.0 - print("QUANTIZATION TIME AND ENERGY", time, energy) - return (time, energy) - - - def __run_promise_simulation(self, swing, layer_data): - layer_name = layer_data["Name"] - patch_factor = 1 - - if Driver.is_conv(layer_name): - rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \ - / (layer_data["Sh"] * layer_data["Sw"]) - cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"] - rows_b = cols_a - cols_b = layer_data["Cout"] - patch_factor = layer_data["Kh"] * layer_data["Kw"] - elif Driver.is_fc(layer_name): - rows_a = layer_data["RA"] - cols_a = layer_data["CA"] - rows_b = layer_data["RB"] - cols_b = layer_data["CB"] - else: - print("PROMISE can't run whatever this layer is.") - exit(1) - # Run promise simulator - # TODO need to print time and energy in the ptm runner so we can pipe it - output = subprocess.Popen(["./ptm_new", str(rows_a), str(cols_a), str(rows_b), \ - str(cols_b), str(patch_factor), str(swing)], \ - stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0] - total_time_energy = output.strip().split(',') - - assert(len(total_time_energy) == 2) - return float(total_time_energy[0]), float(total_time_energy[1]) - - - def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind, \ - approx_type = None, knob_number = None): - tensor_info = self.__tensor_table[layer_name][tensor_ind] - time_key = None - energy_key = None - - if approx_type == Driver.ApproxTypes.PERF or approx_type == Driver.ApproxTypes.SAMP or approx_type == Driver.ApproxTypes.REDUCE or approx_type == Driver.ApproxTypes.FFT: - approx_type_str = None - if approx_type == Driver.ApproxTypes.PERF: - approx_type_str = "perf" - elif approx_type == Driver.ApproxTypes.SAMP: - approx_type_str = "samp" - elif approx_type == Driver.ApproxTypes.REDUCE: - approx_type_str = "reduce" - elif approx_type == Driver.ApproxTypes.FFT: - approx_type_str = "reduce" - - if curr_layer == Driver.PrecisionTypes.FP32: - time_key = "fp32_%s%s_time" % (approx_type_str, knob_number) - energy_key = "fp32_%s%s_energy" % (approx_type_str, knob_number) - - elif curr_layer == Driver.PrecisionTypes.FP16: - time_key = "fp16_%s%s_time" % (approx_type_str, knob_number) - energy_key = "fp16_%s%s_energy" % (approx_type_str, knob_number) - - else: # None for now - if curr_layer == Driver.PrecisionTypes.FP32: - time_key = "fp32_time" - energy_key = "fp32_energy" - - elif curr_layer == Driver.PrecisionTypes.FP16: - time_key = "fp16_time" - energy_key = "fp16_energy" - print("GPU SIMULATION KEYS: ", time_key, energy_key) - conversion_time = tensor_info[time_key] - conversion_energy = tensor_info[energy_key] - print("GPU: (%f, %f)\n" % (conversion_time, conversion_energy)) - return conversion_time, conversion_energy - - - def __write_output(self): - config_file = open(self.__config_filename, "r") - results_file = open(self.__results_filename, "w") - - def write_conf_to_file(conf_name, final_conf, time_speedup, energy_speedup): - # conf = [layer value if promise], [tensor vals if gpu]] - conf_str = ["+++++"] - - # process the first line - first_line, layers = final_conf - first_line_lst = first_line.split(' ') - assert first_line_lst[0] == conf_name - - new_header = [conf_name] - new_header.append(repr(time_speedup)) - new_header.append(repr(energy_speedup)) - new_header.append(repr(abs(float(first_line_lst[-2])))) - new_header.append(repr(abs(float(first_line_lst[-1])))) - conf_str.append(' '.join(new_header)) - - for ind, (hardware, layer) in enumerate(layers): - layer_lst = [str(ind + 1)] - layer_lst.append(hardware) - for op_time, op_energy, tensor_op in layer: - layer_lst.append(tensor_op) - conf_str.append(' '.join(layer_lst)) - conf_str.append("-----\n") - results_file.write('\n'.join(conf_str)) - - fp32_baseline_conf = None - baseline_total_time = baseline_total_energy = 0 - - def get_baseline_times_energies(conf): - curr_time = curr_energy = 0 - for hardware, layer in conf[1]: - for op_time, op_energy, tensor_op in layer: - curr_time += op_time - curr_energy += op_energy - return curr_time, curr_energy - - def get_final_times_energies_conf(curr_conf, curr_conf_name): - final_time = final_energy = 0 - - final_conf = [] # List (conf) of lists (layers) of tuples (operation data) - - for layer_ind, (hardware, layer) in enumerate(curr_conf[1]): - final_conf_layer = [] - - for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer): - if tensor_op.find("softmax") != -1: - final_conf_layer.append((None, None, tensor_op)) - continue - # is promise --> add everything in that layer - if tensor_op.find("promise") != -1: - assert False - else: # look at the individual tensor operation as before - if self.is_fp32(tensor_op): #tensor_op.find("fp32") != -1: - baseline_layer = fp32_baseline_conf[1][layer_ind][1] - print(baseline_layer) - print("FP32") - else: - baseline_layer = self.fp16_baseline[layer_ind][1] - print("FP16") - baseline_time = baseline_layer[tensor_ind][0] - baseline_energy = baseline_layer[tensor_ind][1] - baseline_op = baseline_layer[tensor_ind][2] - print("Baseline time: ", baseline_time) - print("Baseline energy: ", baseline_energy) - print("Baseline op: ", baseline_op) - final_tensor_op = tensor_op - if op_time > baseline_time: - print("**************** BIGGER ******************") - print("Curr conf name: ", curr_conf_name) - print("Baseline data: ", baseline_time, baseline_energy, baseline_op, layer_ind) - print("Curr conf data: ", op_time, tensor_op, layer_ind) - final_time += baseline_time - final_energy += baseline_energy - final_tensor_op = baseline_op - else: - #print("*************** NOT BIGGER ****************") - #print("Curr conf name: ", curr_conf_name) - #print("Baseline data: ", baseline_time, baseline_energy, baseline_op, layer_ind) - #print("Curr conf data: ", op_time, tensor_op, layer_ind) - final_time += op_time - final_energy += op_energy - print("\n") - final_conf_layer.append((None, None, final_tensor_op)) # Don't care about the times and energies when writing - final_conf.append((hardware, final_conf_layer)) - print("\n") - return final_time, final_energy, (curr_conf[0], final_conf) - - conf_index = 0 - print("RESULTS") - for line in config_file: - if line.startswith("conf"): - orig_line_lst = line.split(' ') - conf_name = orig_line_lst[0] - - if not fp32_baseline_conf: - fp32_baseline_conf = self.__conf_results[conf_index] #conf_name] - print("BASELINE") - print(fp32_baseline_conf) - baseline_total_time, baseline_total_energy = get_baseline_times_energies(fp32_baseline_conf) - results_file.write("%s\n" % repr(baseline_total_time)) - write_conf_to_file(conf_name, fp32_baseline_conf, 1, 1) - else: - curr_conf = self.__conf_results[conf_index] #conf_name] - #final_time, final_energy, = get_baseline_times_energies(curr_conf) - final_time, final_energy, curr_conf = get_final_times_energies_conf(curr_conf, conf_name) - write_conf_to_file(conf_name, curr_conf, baseline_total_time / final_time, baseline_total_energy / final_energy) - conf_index += 1 - results_file.close() - config_file.close() - -if __name__ == "__main__": - if len(sys.argv) != 5: - print("Usage: python driver.py <layer info or NONE> <tensor info> <configurations> <results file>") - exit(1) - Driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]).driver() diff --git a/hpvm/projects/hpvm-tensor-rt/bin/legacy/table_fixer.py b/hpvm/projects/hpvm-tensor-rt/bin/legacy/table_fixer.py deleted file mode 100644 index 3095d15aba0757aca3b74705ba57b5e189b5cecb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/legacy/table_fixer.py +++ /dev/null @@ -1,72 +0,0 @@ -# Fixes table format -# Remove all instances of cifar10 --> each col should start with fp16 or fp32 -# Combine multiple tables - -def fix_columns(table_name, new_filename): - table_file = open(table_name, "r") - - new_table_file = [] - - for line in table_file: - line = line.strip() - if line.startswith("**"): - col_names = line.split() - new_col_names = [] - - for col_name in col_names: - if col_name.find("fp16") != -1: - new_col_names.append(col_name[col_name.find("fp16") : ]) - elif col_name.find("fp32") != -1: - new_col_names.append(col_name[col_name.find("fp32") : ]) - else: - new_col_names.append(col_name) - new_table_file.append(' '.join(new_col_names)) - else: - new_table_file.append(line) - table_file.close() - table_file_new = open(new_filename, "w") - table_file_new.write('\n'.join(new_table_file)) - table_file_new.close() - -def combine_tables(table1, table2, new_filename): - table1_file = open(table1, "r") - table2_file = open(table2, "r") - - table1_data = table1_file.read().strip().split('\n') - table2_data = table2_file.read().strip().split('\n') - new_contents = [] - - table2_ind = 0 - for table1_line in table1_data: - table2_line = table2_data[table2_ind] - - if table1_line.startswith("**"): - assert table2_line.startswith("**") - table2_lst = table2_line.strip().split() - table2_cols = ' '.join(table2_lst[3 : ]) - new_contents.append(table1_line + ' ' + table2_cols) - else: - table2_lst = table2_line.strip().split() - table2_cols = ' '.join(table2_lst[1 : ]) - new_contents.append(table1_line + ' ' + table2_cols) - table2_ind += 1 - - table1_file.close() - table2_file.close() - - new_file = open(new_filename, "w") - new_file.write('\n'.join(new_contents)) - new_file.close() - -import sys - -if __name__ == "__main__": - num_args = len(sys.argv) - - if num_args != 4 and num_args != 5: - print("python table_fixer.py <fix> <filename> OR <combine> <table1> <table2> <new name>") - exit(1) - elif sys.argv[1] == "fix": - fix_columns(sys.argv[2], sys.argv[3]) - elif sys.argv[1] == "combine": - combine_tables(sys.argv[2], sys.argv[3], sys.argv[4]) diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cifar_keras.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cifar_keras.cc deleted file mode 100644 index c746e5de6116f701df7370f93969d40486e04e90..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cifar_keras.cc +++ /dev/null @@ -1,203 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testCifarNet(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* CIFAR-10 DNN ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 1000; - - uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size); - - void* input = readTrainedWeights("../model_params/cifar_keras/input.bin", - float_type, - test_batch_size, 3, 32, 32); - - void* conv1_filter = readTrainedWeights("../model_params/cifar_keras/conv1.bin", - float_type, 32, 3, 3, 3); - void* conv1_bias = readTrainedWeights("../model_params/cifar_keras/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/cifar_keras/conv2.bin", - float_type, 64, 32, 3, 3); - void* conv2_bias = readTrainedWeights("../model_params/cifar_keras/conv2_bias.bin", - float_type, 1, 64, 1, 1); - - void* conv3_filter = readTrainedWeights("../model_params/cifar_keras/conv3.bin", - float_type, 128, 64, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/cifar_keras/conv3_bias.bin", - float_type, 1, 128, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/cifar_keras/conv4.bin", - float_type, 128, 128, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/cifar_keras/conv4_bias.bin", - float_type, 1, 128, 1, 1); - - - void* fc1_weights = readTrainedWeights("../model_params/cifar_keras/fc1.bin", - float_type, 1, 1, 2048, 1024); - void* fc1_bias = readTrainedWeights("../model_params/cifar_keras/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/cifar_keras/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/cifar_keras/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performnce profiling - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - - tensorAdd(conv1out, conv1_bias); // NOTE: In place operation - - void* conv1_tanh = tensorTanh(conv1out); - - - // 2nd Layer - void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); // NOTE: In place operation - - void* conv2_tanh = tensorTanh(conv2out); - - void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - - - // 3rd Layer - void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv3out, conv3_bias); // NOTE: In place operation - - void* conv3_tanh = tensorTanh(conv3out); - - void* pool3out = tensorPooling(conv3_tanh, 0, 2, 2, 0, 0, 2, 2); - - - // 4th Layer - void* conv4out = tensorConvolution(pool3out, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv4out, conv4_bias); // NOTE: In place operation - - void* conv4_tanh = tensorTanh(conv4out); - - void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2); - - - printTensorDims(pool4out); - - - void* gemm1out = tensorGemmGPU(pool4out, fc1_weights); - - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - - void* tanh1out = tensorTanh(gemm1biasout); - - void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights); - - void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); - - void* tanh2out = tensorTanh(gemm2_biasout); - - void* result = tensorSoftmax(tanh2out); - - printTensorDims(result); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testCifarNet(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/alexnet_cifar10_cpu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/alexnet_cifar10_cpu.cc deleted file mode 100644 index b64d52678238825fe6e6368d1d15f7958c3759aa..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/alexnet_cifar10_cpu.cc +++ /dev/null @@ -1,181 +0,0 @@ - - -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> - -#include "../../tensor_runtime/include/tensor_cpu_runtime.h" -#include "../include/utils_cpu.h" -#include "../include/types.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testCifarNet(){ - - int total_runs = 100; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* CIFAR-10 DNN ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 100; - - //uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size); - uint8_t* labels = readLabels("./model_params/alexnet_cifar10/test_labels.bin", test_batch_size); - - void* input = readTrainedWeightsCPU("./model_params/alexnet_cifar10/norm_cifar_input.bin", - float_type, - test_batch_size, 3, 32, 32); - - void* conv1_filter = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv1.bin", - float_type, 64, 3, 11, 11); - void* conv1_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv1_bias.bin", - float_type, 1, 64, 1, 1); - void* conv2_filter = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv2.bin", - float_type, 192, 64, 5, 5); - void* conv2_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv2_bias.bin", - float_type, 1, 192, 1, 1); - - void* conv3_filter = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv3.bin", - float_type, 384, 192, 3, 3); - void* conv3_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv3_bias.bin", - float_type, 1, 384, 1, 1); - void* conv4_filter = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv4.bin", - float_type, 256, 384, 3, 3); - void* conv4_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv4_bias.bin", - float_type, 1, 256, 1, 1); - void* conv5_filter = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv5.bin", - float_type, 256, 256, 3, 3); - void* conv5_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/conv5_bias.bin", - float_type, 1, 256, 1, 1); - - void* fc1_weights = readTrainedWeightsCPU("./model_params/alexnet_cifar10/fc1.bin", - float_type, 1, 1, 4096, 10); - void* fc1_bias = readTrainedWeightsCPU("./model_params/alexnet_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorConvolutionCPU(input, conv1_filter, 5, 5, 1, 1, - conv_mode, conv_precision); - - tensorAddCPU(conv1out, conv1_bias); // NOTE: In place operation - - void* conv1_tanh = tensorTanhCPU(conv1out); - - void* pool1out = tensorPoolingCPU(conv1_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 2nd Layer - void* conv2out = tensorConvolutionCPU(pool1out, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - tensorAddCPU(conv2out, conv2_bias); // NOTE: In place operation - - void* conv2_tanh = tensorTanhCPU(conv2out); - - void* pool2out = tensorPoolingCPU(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - - - // 3rd Layer - void* conv3out = tensorConvolutionCPU(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAddCPU(conv3out, conv3_bias); // NOTE: In place operation - - void* conv3_tanh = tensorTanhCPU(conv3out); - - // 4th Layer - void* conv4out = tensorConvolutionCPU(conv3_tanh, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAddCPU(conv4out, conv4_bias); // NOTE: In place operation - - void* conv4_tanh = tensorTanhCPU(conv4out); - - // 5th Layer - void* conv5out = tensorConvolutionCPU(conv4_tanh, conv5_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAddCPU(conv5out, conv5_bias); // NOTE: In place operation - - void* conv5_tanh = tensorTanhCPU(conv5out); - - void* pool5out = tensorPoolingCPU(conv5_tanh, 0, 2, 2, 0, 0, 2, 2); - - // final FC Layer - void* gemm1out = tensorGemmCPU(pool5out, fc1_weights); - - void* gemm1biasout = tensorAddCPU(gemm1out, fc1_bias); - - void* result = tensorSoftmaxCPU(gemm1biasout); - - computeAccuracy2(labels, test_batch_size, result); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(1); - - testCifarNet(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/resnet18_cifar10_cpu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/resnet18_cifar10_cpu.cc deleted file mode 100644 index df540551d71814bf4bf18d349bf08cb03151e1dc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/cpu/resnet18_cifar10_cpu.cc +++ /dev/null @@ -1,253 +0,0 @@ - -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> - -#include "../../tensor_runtime/include/tensor_cpu_runtime.h" -#include "../include/types.h" -#include "../include/utils_cpu.h" -int main() { - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("./model_params/resnet18_cifar10_3/"); - std::string input_path = dir_prefix + std::string("input.bin"); - // void* input = readTrainedWeightsCPU(input_path.c_str(), 0, batch_size,3,32,32); - std::string labels_path = dir_prefix + std::string("labels.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void *conv2d_1_w = - readTrainedWeightsCPU(conv2d_1_w_path.c_str(), 0, 16, 3, 3, 3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void *conv2d_1_b = - readTrainedWeightsCPU(conv2d_1_b_path.c_str(), 0, 1, 16, 1, 1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void *conv2d_2_w = - readTrainedWeightsCPU(conv2d_2_w_path.c_str(), 0, 16, 16, 3, 3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void *conv2d_2_b = - readTrainedWeightsCPU(conv2d_2_b_path.c_str(), 0, 1, 16, 1, 1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void *conv2d_3_w = - readTrainedWeightsCPU(conv2d_3_w_path.c_str(), 0, 16, 16, 3, 3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void *conv2d_3_b = - readTrainedWeightsCPU(conv2d_3_b_path.c_str(), 0, 1, 16, 1, 1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void *conv2d_4_w = - readTrainedWeightsCPU(conv2d_4_w_path.c_str(), 0, 16, 16, 3, 3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void *conv2d_4_b = - readTrainedWeightsCPU(conv2d_4_b_path.c_str(), 0, 1, 16, 1, 1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void *conv2d_5_w = - readTrainedWeightsCPU(conv2d_5_w_path.c_str(), 0, 16, 16, 3, 3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void *conv2d_5_b = - readTrainedWeightsCPU(conv2d_5_b_path.c_str(), 0, 1, 16, 1, 1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void *conv2d_6_w = - readTrainedWeightsCPU(conv2d_6_w_path.c_str(), 0, 16, 16, 3, 3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void *conv2d_6_b = - readTrainedWeightsCPU(conv2d_6_b_path.c_str(), 0, 1, 16, 1, 1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void *conv2d_7_w = - readTrainedWeightsCPU(conv2d_7_w_path.c_str(), 0, 16, 16, 3, 3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void *conv2d_7_b = - readTrainedWeightsCPU(conv2d_7_b_path.c_str(), 0, 1, 16, 1, 1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void *conv2d_8_w = - readTrainedWeightsCPU(conv2d_8_w_path.c_str(), 0, 32, 16, 3, 3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void *conv2d_8_b = - readTrainedWeightsCPU(conv2d_8_b_path.c_str(), 0, 1, 32, 1, 1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void *conv2d_10_w = - readTrainedWeightsCPU(conv2d_10_w_path.c_str(), 0, 32, 16, 1, 1); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void *conv2d_10_b = - readTrainedWeightsCPU(conv2d_10_b_path.c_str(), 0, 1, 32, 1, 1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void *conv2d_9_w = - readTrainedWeightsCPU(conv2d_9_w_path.c_str(), 0, 32, 32, 3, 3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void *conv2d_9_b = - readTrainedWeightsCPU(conv2d_9_b_path.c_str(), 0, 1, 32, 1, 1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void *conv2d_11_w = - readTrainedWeightsCPU(conv2d_11_w_path.c_str(), 0, 32, 32, 3, 3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void *conv2d_11_b = - readTrainedWeightsCPU(conv2d_11_b_path.c_str(), 0, 1, 32, 1, 1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void *conv2d_12_w = - readTrainedWeightsCPU(conv2d_12_w_path.c_str(), 0, 32, 32, 3, 3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void *conv2d_12_b = - readTrainedWeightsCPU(conv2d_12_b_path.c_str(), 0, 1, 32, 1, 1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void *conv2d_13_w = - readTrainedWeightsCPU(conv2d_13_w_path.c_str(), 0, 32, 32, 3, 3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void *conv2d_13_b = - readTrainedWeightsCPU(conv2d_13_b_path.c_str(), 0, 1, 32, 1, 1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void *conv2d_14_w = - readTrainedWeightsCPU(conv2d_14_w_path.c_str(), 0, 32, 32, 3, 3); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void *conv2d_14_b = - readTrainedWeightsCPU(conv2d_14_b_path.c_str(), 0, 1, 32, 1, 1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void *conv2d_15_w = - readTrainedWeightsCPU(conv2d_15_w_path.c_str(), 0, 64, 32, 3, 3); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void *conv2d_15_b = - readTrainedWeightsCPU(conv2d_15_b_path.c_str(), 0, 1, 64, 1, 1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void *conv2d_17_w = - readTrainedWeightsCPU(conv2d_17_w_path.c_str(), 0, 64, 32, 1, 1); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void *conv2d_17_b = - readTrainedWeightsCPU(conv2d_17_b_path.c_str(), 0, 1, 64, 1, 1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void *conv2d_16_w = - readTrainedWeightsCPU(conv2d_16_w_path.c_str(), 0, 64, 64, 3, 3); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void *conv2d_16_b = - readTrainedWeightsCPU(conv2d_16_b_path.c_str(), 0, 1, 64, 1, 1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void *conv2d_18_w = - readTrainedWeightsCPU(conv2d_18_w_path.c_str(), 0, 64, 64, 3, 3); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void *conv2d_18_b = - readTrainedWeightsCPU(conv2d_18_b_path.c_str(), 0, 1, 64, 1, 1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void *conv2d_19_w = - readTrainedWeightsCPU(conv2d_19_w_path.c_str(), 0, 64, 64, 3, 3); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void *conv2d_19_b = - readTrainedWeightsCPU(conv2d_19_b_path.c_str(), 0, 1, 64, 1, 1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void *conv2d_20_w = - readTrainedWeightsCPU(conv2d_20_w_path.c_str(), 0, 64, 64, 3, 3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void *conv2d_20_b = - readTrainedWeightsCPU(conv2d_20_b_path.c_str(), 0, 1, 64, 1, 1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void *conv2d_21_w = - readTrainedWeightsCPU(conv2d_21_w_path.c_str(), 0, 64, 64, 3, 3); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void *conv2d_21_b = - readTrainedWeightsCPU(conv2d_21_b_path.c_str(), 0, 1, 64, 1, 1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void *dense_1_w = - readTrainedWeightsCPU(dense_1_w_path.c_str(), 0, 1, 1, 64, 10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void *dense_1_b = - readTrainedWeightsCPU(dense_1_b_path.c_str(), 0, 1, 10, 1, 1); - - int test_input_size = 10000; - int batch_size = 100; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for (int i = 0; i < batch_count; i++) { - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readTrainedWeightsCPU(input_path.c_str(), 0,batch_size,3,32,32); - uint8_t *labels = readLabels(labels_path.c_str(), batch_size); - - void* var_2 = tensorConvolutionCPU(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_3 = tensorAddCPU(var_2, conv2d_1_b); - void* var_4 = tensorReluCPU(var_3); - void* var_6 = tensorConvolutionCPU(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_7 = tensorAddCPU(var_6, conv2d_2_b); - void* var_8 = tensorReluCPU(var_7); - void* var_10 = tensorConvolutionCPU(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_11 = tensorAddCPU(var_10, conv2d_3_b); - void* var_12 = tensorAddCPU(var_4, var_11); - void *var_13 = tensorReluCPU(var_12); - void* var_15 = tensorConvolutionCPU(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_16 = tensorAddCPU(var_15, conv2d_4_b); - void* var_17 = tensorReluCPU(var_16); - void* var_19 = tensorConvolutionCPU(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_20 = tensorAddCPU(var_19, conv2d_5_b); - void* var_21 = tensorAddCPU(var_13, var_20); - void* var_22 = tensorReluCPU(var_21); - void* var_24 = tensorConvolutionCPU(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorAddCPU(var_24, conv2d_6_b); - void *var_26 = tensorReluCPU(var_25); - void* var_28 = tensorConvolutionCPU(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorAddCPU(var_28, conv2d_7_b); - void *var_30 = tensorAddCPU(var_22, var_29); - void *var_31 = tensorReluCPU(var_30); - void* var_33 = tensorConvolutionCPU(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0); - void *var_34 = tensorAddCPU(var_33, conv2d_8_b); - void *var_35 = tensorReluCPU(var_34); - void *var_37 = tensorConvolutionCPU(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_38 = tensorAddCPU(var_37, conv2d_9_b); - void* var_40 = tensorConvolutionCPU(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0); - void *var_41 = tensorAddCPU(var_40, conv2d_10_b); - void* var_42 = tensorAddCPU(var_41, var_38); - void* var_43 = tensorReluCPU(var_42); - void* var_45 = tensorConvolutionCPU(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_46 = tensorAddCPU(var_45, conv2d_11_b); - void *var_47 = tensorReluCPU(var_46); - void* var_49 = tensorConvolutionCPU(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_50 = tensorAddCPU(var_49, conv2d_12_b); - void* var_51 = tensorAddCPU(var_43, var_50); - void* var_52 = tensorReluCPU(var_51); - void* var_54 = tensorConvolutionCPU(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_55 = tensorAddCPU(var_54, conv2d_13_b); - void* var_56 = tensorReluCPU(var_55); - void* var_58 = tensorConvolutionCPU(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0); - void* var_59 = tensorAddCPU(var_58, conv2d_14_b); - void* var_60 = tensorAddCPU(var_52, var_59); - void* var_61 = tensorReluCPU(var_60); - void* var_63 = tensorConvolutionCPU(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0); - void* var_64 = tensorAddCPU(var_63, conv2d_15_b); - void* var_65 = tensorReluCPU(var_64); - void* var_67 = tensorConvolutionCPU(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0); - void* var_68 = tensorAddCPU(var_67, conv2d_16_b); - void* var_70 = tensorConvolutionCPU(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0); - void* var_71 = tensorAddCPU(var_70, conv2d_17_b); - void* var_72 = tensorAddCPU(var_71, var_68); - void* var_73 = tensorReluCPU(var_72); - void* var_75 = tensorConvolutionCPU(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0); - void* var_76 = tensorAddCPU(var_75, conv2d_18_b); - void* var_77 = tensorReluCPU(var_76); - void* var_79 = tensorConvolutionCPU(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0); - void* var_80 = tensorAddCPU(var_79, conv2d_19_b); - void* var_81 = tensorAddCPU(var_73, var_80); - void* var_82 = tensorReluCPU(var_81); - void* var_84 = tensorConvolutionCPU(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0); - void* var_85 = tensorAddCPU(var_84, conv2d_20_b); - void* var_86 = tensorReluCPU(var_85); - void* var_88 = tensorConvolutionCPU(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0); - void* var_89 = tensorAddCPU(var_88, conv2d_21_b); - void* var_90 = tensorAddCPU(var_82, var_89); - void* var_91 = tensorReluCPU(var_90); - void* var_92 = tensorPoolingCPU(var_91,1,8,8,0,0,8,8); - void* var_94 = tensorGemmCPU(var_92, dense_1_w); - void* var_95 = tensorAddCPU(var_94, dense_1_b); - void* var_96 = tensorSoftmaxCPU(var_95); - - float accuracy = computeAccuracy2(labels,batch_size, var_96); - final_accuracy += accuracy; - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/depthwise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/depthwise.cc deleted file mode 100644 index 00e259079058f1be5163bd43d9982e07b82f1001..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/depthwise.cc +++ /dev/null @@ -1,84 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/depthwise_test_8/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string depthwise_conv2d_1_b_path = dir_prefix + std::string("depthwise_conv2d_1_b.bin"); - void* depthwise_conv2d_1_b = readTrainedWeights(depthwise_conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,6272,1024); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 10000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 1); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); - void* var_4 = tensorConvolution(var_3, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorAdd(var_4, depthwise_conv2d_1_b); - void* var_6 = tensorRelu(var_5); - void* var_8 = tensorGemmGPU(var_6, dense_1_w); - void* var_9 = tensorAdd(var_8, dense_1_b); - void* var_10 = tensorRelu(var_9); - void* var_11 = tensorGemmGPU(var_10, dense_2_w); - void* var_12 = tensorAdd(var_11, dense_2_b); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorSoftmax(var_13); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_14); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/blend_pareto.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/blend_pareto.cpp deleted file mode 100644 index 1596a157f4175b0462e0b762c643542cc05cf337..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/blend_pareto.cpp +++ /dev/null @@ -1,110 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <algorithm> -#include <cassert> -#include <fstream> -#include <string> - -const size_t n_channels = 3; - -Tensor *gaussianFilter_(float div) { - std::vector<float> gauss_data = {1, 4, 6, 4, 1, 4, 16, 24, 16, - 4, 6, 24, 36, 24, 6, 4, 16, 24, - 16, 4, 1, 4, 6, 4, 1}; - for (float &f : gauss_data) - f /= div; - return (Tensor *)createFilterFromData( - CUDNN_DATA_FLOAT, gauss_data.data(), 5, 5, 1); -} - -Tensor *gaussianFilter() { return gaussianFilter_(16.0); } - -void *normalize(size_t &id, void *image) { - auto *max_1D = wrapper_tensorReduce( - std::to_string(id++).c_str(), image, 2, (int)MathOp::Max); - auto *max = wrapper_tensorReduce( - std::to_string(id++).c_str(), max_1D, 3, (int)MathOp::Max); - auto *img_norm = wrapper_tensorMap2( - std::to_string(id++).c_str(), (int)MathOp::Div, image, max); - freeTensor(max_1D); - freeTensor(max); - return img_norm; -} - -extern std::vector<size_t> sizes(Tensor *t); - -void forward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[1] == 3); - sz[0] = sz[0] * sz[1]; - sz[1] = 1; - reshape(tensor, sz); -} - -void backward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[0] % 3 == 0); - sz[0] = sz[0] / 3; - sz[1] = 3; - reshape(tensor, sz); -} - -void *sharpen(size_t &id, void *image) { - void *gaussian = gaussianFilter(); - forward_reshape(image); - void *blurred = wrapper_ConvLayer( - std::to_string(id++).c_str(), image, gaussian, nullptr, 2, 2, 1, 1, 0, 0, -1, 0.0, 0.0); - backward_reshape(image); - backward_reshape(blurred); - void *blurred_norm = normalize(id, blurred); - void *image_norm = normalize(id, image); - void *ret = wrapper_tensorMap2( - std::to_string(id++).c_str(), (int)MathOp::AddWeighted, blurred_norm, - image_norm); - freeTensor(gaussian); - freeTensor(blurred); - freeTensor(blurred_norm); - freeTensor(image_norm); - return ret; -} - -void *main_procedure(void *fg, void *bg) { - size_t id = 1; - void *g_bg = sharpen(id, bg); - void *g_fg = sharpen(id, fg); - void *ret = wrapper_tensorMap2( - std::to_string(id++).c_str(), (int)MathOp::Blend2, g_bg, g_fg); - freeTensor(g_bg); - freeTensor(g_fg); - return ret; -} - -const size_t batch_size = 250, total_max = 5000; -const float psnr_threshold = 25.0; - -int main() { - const char *input1_path = "../model_params/image_processing_5k"; - const char *input2_path = "../model_params/image_processing_5k_shuffled"; - const char *ref_output_path = "../model_params/blending_ref_output"; - - llvm_hpvm_initTensorRt(0); - llvm_hpvm_initializeRuntimeController("tuner_confs.txt", ""); - startMemTracking(); - size_t bstart = 0; - while (true) { - auto *background = readDataSet(input1_path, bstart, batch_size, n_channels), - *foreground = readDataSet(input2_path, bstart, batch_size, n_channels); - if (!background || !foreground) - break; - - auto *result = main_procedure(foreground, background); - llvm_hpvm_invokeRtControl(result, nullptr, bstart, bstart + batch_size); - bstart += batch_size; - if (bstart >= total_max) - break; - freeBatchMemory(); - } - llvm_hpvm_clearRuntimeController(); -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/canny_pareto.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/canny_pareto.cpp deleted file mode 100644 index 7eb80568596fdc6d1c17e55a177e022f3c3a284a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/canny_pareto.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <vector> - -Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { - int64_t m = (w - 1) / 2, n = (h - 1) / 2; - auto *data = new float[w * h]; - float sum = 0.0f; - for (int64_t i = -m; i <= m; i++) - for (int64_t j = -n; j <= n; j++) { - size_t idx = (i + m) * h + (j + n); - float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); - data[idx] = exp(exponent); - sum += data[idx]; - } - if (sum != 0.0f) - for (size_t i = 0; i < w * h; i++) - data[i] /= sum; - return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); -} - -std::pair<Tensor *, Tensor *> getSobelKernels() { - std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); - std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); - auto *t1 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1); - auto *t2 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1); - return std::make_pair(t1, t2); -} - -void *main_procedure(void *dataset) { - Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1); - Tensor *kernel_x, *kernel_y; - std::tie(kernel_x, kernel_y) = getSobelKernels(); - - // 0. Grayscale - auto *summed_image = wrapper_tensorReduce("1", dataset, 1, (int)MathOp::Add); - auto *grayscale_image = - wrapper_tensorMap1("2", (int)MathOp::Avg3, summed_image); - // 1. Denoise - auto *image2 = wrapper_ConvLayer( - "3", grayscale_image, gaussian, nullptr, 2, 2, 1, 1, 0, 0, -1, 0.0, 0.0); - // 2. Get edge gradient / direction - auto *grad_x = wrapper_ConvLayer( - "4", image2, kernel_x, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0); - auto *grad_y = wrapper_ConvLayer( - "5", image2, kernel_y, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0); - auto *grad_mag = wrapper_tensorMap2("6", (int)MathOp::Hypot, grad_x, grad_y); - // 2.5. Normalize grad magnitude - auto *grad_max_1D = wrapper_tensorReduce("7", grad_mag, 2, (int)MathOp::Max); - auto *grad_max = wrapper_tensorReduce("8", grad_max_1D, 3, (int)MathOp::Max); - auto *grad_mag_norm = - wrapper_tensorMap2("9", (int)MathOp::Div, grad_mag, grad_max); - return grad_mag_norm; -} - -const size_t n_channels = 3; -const size_t batch_size = 250, total_max = 5000; -const float psnr_threshold = 25.0; - -int main(int argc, char *argv[]) { - const char *input_path = "../model_params/image_processing_5k"; - const char *ref_output_path = "../model_params/canny_ref_output"; - llvm_hpvm_initTensorRt(0); - llvm_hpvm_initializeRuntimeController("tuner_confs.txt", ""); - size_t bstart = 0; - startMemTracking(); - while (true) { - Tensor *batch = readDataSet(input_path, bstart, batch_size); - if (batch == nullptr) - break; - - auto *result = main_procedure(batch); - llvm_hpvm_invokeRtControl(result, nullptr, bstart, bstart + batch_size); - bstart += batch_size; - if (bstart >= total_max) - break; - freeBatchMemory(); - } - llvm_hpvm_clearRuntimeController(); -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/fft_pareto.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/fft_pareto.cpp deleted file mode 100644 index f925f98712b0016ba300c588803c75dd3a364ddc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/dynamic/fft_pareto.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <cmath> -#include <iostream> -#include <thrust/complex.h> -#include <vector> - -const size_t batch_size = 250, total_max = 5000; -const size_t half_size = 64, n_colors = N_RGB_CHAN; -const float psnr_threshold = 25.0; - -Tensor *sharpeningFilter(float sigma, size_t w, size_t h) { - static const float gaussian_rate = -0.5; - - int64_t m = (w - 1) / 2, n = (h - 1) / 2; - auto *data = new float[w * h]; - float sum = 0.0f; - for (int64_t i = -m; i <= m; i++) - for (int64_t j = -n; j <= n; j++) { - size_t idx = (i + m) * h + (j + n); - float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); - float val = exp(exponent); - data[idx] = val; - sum += data[idx]; - } - for (size_t i = 0; i < w * h; i++) - data[i] *= gaussian_rate / sum; - size_t center_idx = m * h + n; - data[center_idx] += (1 - gaussian_rate); - return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, 1); -} - -Tensor *outliningFilter() { - float data[9] = {-1, -1, -1, -1, 8, -1, -1, -1, -1}; - return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, 3, 3, 1); -} - -extern std::vector<size_t> sizes(Tensor *t); - -void forward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[1] == 3); - sz[0] = sz[0] * sz[1]; - sz[1] = 1; - reshape(tensor, sz); -} - -void backward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[0] % 3 == 0); - sz[0] = sz[0] / 3; - sz[1] = 3; - reshape(tensor, sz); -} - -void *main_procedure(void *dataset, void *outline, void *sharpen) { - forward_reshape(dataset); - auto *sharpened1 = wrapper_ConvLayer( - "1", dataset, sharpen, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0); - auto *sharpened2 = wrapper_ConvLayer( - "2", sharpened1, sharpen, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0); - auto *outlined = wrapper_ConvLayer( - "3", sharpened2, outline, nullptr, 1, 1, 1, 1, 0, 0, -1, 0.0, 0.0); - backward_reshape(outlined); - return outlined; -} - -int main(int argc, char *argv[]) { - const char *input1_path = "../model_params/image_processing_5k_128_128"; - const char *ref_output_path = "../model_params/fft_ref_output"; - llvm_hpvm_initTensorRt(0); - llvm_hpvm_initializeRuntimeController("tuner_confs.txt", ""); - Tensor *outline = outliningFilter(), *sharpen = sharpeningFilter(1.5, 3, 3); - size_t bstart = 0; - startMemTracking(); - while (true) { - Tensor *batch = readDataSet(input1_path, bstart, batch_size); - if (batch == nullptr) // If end of dataset - break; - auto *result = main_procedure(batch, outline, sharpen); - llvm_hpvm_invokeRtControl(result, nullptr, bstart, bstart + batch_size); - bstart += batch_size; - if (bstart >= total_max) - break; - freeBatchMemory(); - } - llvm_hpvm_clearRuntimeController(); -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc2_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc2_half.cc deleted file mode 100644 index 44c03aab875a6de4af6c87776241295cd1fd673b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc2_half.cc +++ /dev/null @@ -1,137 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include "../../include/types.h" - - -bool Opentuner_run = false; - - -void test_2_Layer_clipped_FC(){ - - int total_runs = 1; - - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* 2-Layer FC with clipped activations and weights ********* \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 5000; - - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin", - float_type, 1, 1, 784, 128); - void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin", - float_type, 1, 128, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin", - float_type, 1, 1, 128, 10); - void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start execution profiling Tensor ops - startProfiling(); - - // Layer-1 - void* fc1out = tensorHalfGemm(input, fc1_weights); - - void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias); - - void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2); - - // Layer-2 - void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights); - - void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias); - - void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2); - - void* result = tensorSoftmax(fc2_relu); - - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - -} - - -// If an argument is passed - the run goes into OpenTuner mode - waiting on a pipe -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - // This initializes the runtime - must be called before anything - llvm_hpvm_initTensorRt(0); - - test_2_Layer_clipped_FC(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc3_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc3_half.cc deleted file mode 100644 index 697fea9b8aa61a8c3cf5ec3e8d0d66466df9b1e8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc3_half.cc +++ /dev/null @@ -1,151 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include "../../include/types.h" - - - - -bool Opentuner_run = false; - - -void test_3_Layer_clipped_FC(){ - - - int total_runs = 1000; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* 3-Layer FC with clipped activations and weights ********* \n"); - - int test_batch_size = 5000; - - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeights("../model_params/fc3_clipped/fc1.bin", - float_type, 1, 1, 784, 256); - void* fc1_bias = readTrainedWeights("../model_params/fc3_clipped/fc1_bias.bin", - float_type, 1, 256, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/fc3_clipped/fc2.bin", - float_type, 1, 1, 256, 128); - void* fc2_bias = readTrainedWeights("../model_params/fc3_clipped/fc2_bias.bin", - float_type, 1, 128, 1, 1); - void* fc3_weights = readTrainedWeights("../model_params/fc3_clipped/fc3.bin", - float_type, 1, 1, 128, 10); - void* fc3_bias = readTrainedWeights("../model_params/fc3_clipped/fc3_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - // Start execution profiling Tensor ops - startProfiling(); - - - // Layer-1 - void* fc1out = tensorHalfGemm(input, fc1_weights); - - void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias); - - void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2); - - // Layer-2 - void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights); - - void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias); - - void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2); - - // Layer-3 - void* fc3out = tensorHalfGemm(fc2_relu, fc3_weights); - - void* fc3_bias_out = tensorHalfAdd(fc3out, fc3_bias); - - void* fc3_relu = tensorHalfRelu2(fc3_bias_out, 0, 2); - - void* result = tensorSoftmax(fc3_relu); - - - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - -} - - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - // This initializes the runtime - must be called before anything - llvm_hpvm_initTensorRt(0); - - test_3_Layer_clipped_FC(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc4_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc4_half.cc deleted file mode 100644 index ad999165cfd4148479de58e24fed8291161da491..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/fc4_half.cc +++ /dev/null @@ -1,156 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include "../../include/types.h" - - -bool Opentuner_run = false; - - -void test_4_Layer_clipped_FC(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* 3-Layer FC with clipped activations and weights ********* \n"); - - int test_batch_size = 5000; - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeights("../model_params/fc4_clipped/fc1.bin", - float_type, 1, 1, 784, 512); - void* fc1_bias = readTrainedWeights("../model_params/fc4_clipped/fc1_bias.bin", - float_type, 1, 512, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/fc4_clipped/fc2.bin", - float_type, 1, 1, 512, 256); - void* fc2_bias = readTrainedWeights("../model_params/fc4_clipped/fc2_bias.bin", - float_type, 1, 256, 1, 1); - void* fc3_weights = readTrainedWeights("../model_params/fc4_clipped/fc3.bin", - float_type, 1, 1, 256, 128); - void* fc3_bias = readTrainedWeights("../model_params/fc4_clipped/fc3_bias.bin", - float_type, 1, 128, 1, 1); - void* fc4_weights = readTrainedWeights("../model_params/fc4_clipped/fc4.bin", - float_type, 1, 1, 128, 10); - void* fc4_bias = readTrainedWeights("../model_params/fc4_clipped/fc4_bias.bin", - float_type, 1, 10, 1, 1); - - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start execution profiling Tensor ops - startProfiling(); - - // Layer-1 - void* fc1out = tensorHalfGemm(input, fc1_weights); - - void* fc1_bias_out = tensorHalfAdd(fc1out, fc1_bias); - - void* fc1_relu = tensorHalfRelu2(fc1_bias_out, 0, 2); - - // Layer-2 - void* fc2out = tensorHalfGemm(fc1_relu, fc2_weights); - - void* fc2_bias_out = tensorHalfAdd(fc2out, fc2_bias); - - void* fc2_relu = tensorHalfRelu2(fc2_bias_out, 0, 2); - - // Layer-3 - void* fc3out = tensorHalfGemm(fc2_relu, fc3_weights); - - void* fc3_bias_out = tensorHalfAdd(fc3out, fc3_bias); - - void* fc3_relu = tensorHalfRelu2(fc3_bias_out, 0, 2); - - // Layer-4 - void* fc4out = tensorHalfGemm(fc3_relu, fc4_weights); - - void* fc4_bias_out = tensorHalfAdd(fc4out, fc4_bias); - - void* fc4_relu = tensorHalfRelu2(fc4_bias_out, 0, 2); - - void* result = tensorSoftmax(fc4_relu); - - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } - - -} - - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - // This initializes the runtime - must be called before anything - llvm_hpvm_initTensorRt(0); - - test_4_Layer_clipped_FC(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/lenet_tanh_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/lenet_tanh_half.cc deleted file mode 100644 index bb45b14d62e061e704b252aa44e602e0c1d08ba7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/lenet_tanh_half.cc +++ /dev/null @@ -1,173 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include "../../include/types.h" - - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* Lenet-2 Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 5000; - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet_tanh2/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_tanh2/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_tanh2/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_tanh2/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_tanh2/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet_tanh2/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet_tanh2/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet_tanh2/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performnce profiling - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorHalfConvolution(input, conv1_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - - // NOTE: For tensorAdd, the only dimension that MUST match is channels - tensorHalfAdd(conv1out, conv1_bias); // NOTE: In place operation - - void* pool1out = tensorHalfPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); - - void* conv1_tanh = tensorHalfTanh(pool1out); - - // NOTE: input channels have to match between tensor op inputs and outputs - void* conv2out = tensorHalfConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - tensorHalfAdd(conv2out, conv2_bias); // NOTE: In place operation - - void* pool2out = tensorHalfPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); - - void* conv2_tanh = tensorHalfTanh(pool2out); - - void* gemm1out = tensorHalfGemm(conv2_tanh, fc1_weights); - - void* gemm1biasout = tensorHalfAdd(gemm1out, fc1_bias); - - void* tanh1out = tensorHalfTanh(gemm1biasout); - - void* gemm2out = tensorHalfGemm(tanh1out, fc2_weights); - - void* gemm2_biasout = tensorHalfAdd(gemm2out, fc2_bias); - - void* tanh2out = tensorHalfTanh(gemm2_biasout); - - void* result = tensorSoftmax(tanh2out); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEMO_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEMO_half.cc deleted file mode 100644 index 23fd15576ace419976a2b4d7f8191079a59c8c31..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEMO_half.cc +++ /dev/null @@ -1,164 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - startProfiling(); - - printf("********* Pipeline: Gaussian - Emboss - Motion Blur - Outline ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - //long int test_batch_size = 9145; - //long int test_batch_size = 4572; - long int test_batch_size = 2000; - - long int H = 240; - long int W = 300; - - printf("Reading input\n"); - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - printf("Reading golden output\n"); - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEMO_calib.bin", - float_type, - test_batch_size, 1, H, W); - - - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* gaussian_out = tensorHalfConvolution(input, gaussian_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - - void * gaussian_out_clip = tensorHalfRelu2(gaussian_out, 0, 255); - - void* emboss_out = tensorHalfConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - void* emboss_bias_out = tensorHalfAdd(emboss_out, emboss_bias); - void* emboss_bias_out_clip = tensorHalfRelu2(emboss_bias_out, 0, 255); - - void* motionblur_out = tensorHalfConvolution(emboss_bias_out_clip, motionblur_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * motionblur_out_clip = tensorHalfRelu2(motionblur_out, 0, 255); - - void* outline_out = tensorHalfConvolution(motionblur_out_clip, outline_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - void * result = tensorHalfRelu2(outline_out, 0, 255); - - - // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation - hpvm_request_tensor(result, 0); - - //dumpOutput(result, "GEMO_calib.bin"); - - - computePSNRViolation(result, golden_output, 30); - - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - stopProfiling(); - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEOM_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEOM_half.cc deleted file mode 100644 index 07875553d59a3635c21db8975db9e8986d1bc6c9..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEOM_half.cc +++ /dev/null @@ -1,101 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - - startProfiling(); - - printf("********* Pipeline: Gaussian - Emboss - Outline - Motion Blur ********** \n"); - - long int test_batch_size = 2000; - long int H = 240; - long int W = 300; - - printf("Reading input\n"); - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - printf("Reading golden output\n"); - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEOM_calib.bin", - float_type, - test_batch_size, 1, H, W); - - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - - for(int i = 0; i < total_runs; i++){ - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* gaussian_out = tensorHalfConvolution(input, gaussian_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - - void * gaussian_out_clip = tensorHalfRelu2(gaussian_out, 0, 255); - - void* emboss_out = tensorHalfConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - void* emboss_bias_out = tensorHalfAdd(emboss_out, emboss_bias); - void* emboss_bias_out_clip = tensorHalfRelu2(emboss_bias_out, 0, 255); - - void* outline_out = tensorHalfConvolution(emboss_bias_out_clip, outline_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - void * outline_out_clip = tensorHalfRelu2(outline_out, 0, 255); - - void* motionblur_out = tensorHalfConvolution(outline_out_clip, motionblur_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * result = tensorHalfRelu2(motionblur_out, 0, 255); - - - // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation - hpvm_request_tensor(result, 0); - - //dumpOutput(result, "GEOM_calib.bin"); - - computePSNRViolation(result, golden_output, 30); - - } - - stopProfiling(); -} - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEO_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEO_half.cc deleted file mode 100644 index 369d8ff4f76aaff4663532ee634a79d54b94b2aa..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GEO_half.cc +++ /dev/null @@ -1,93 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - - printf("********* Pipeline: Gaussian - Emboss - Outline ********** \n"); - - startProfiling(); - - long int test_batch_size = 2000; - long int H = 240; - long int W = 300; - - printf("Reading input\n"); - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - printf("Reading golden output\n"); - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEO_calib.bin", - float_type, - test_batch_size, 1, H, W); - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - - for(int i = 0; i < total_runs; i++){ - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* gaussian_out = tensorHalfConvolution(input, gaussian_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - - void * gaussian_out_clip = tensorHalfRelu2(gaussian_out, 0, 255); - - void* emboss_out = tensorHalfConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - void* emboss_bias_out = tensorHalfAdd(emboss_out, emboss_bias); - void* emboss_bias_out_clip = tensorHalfRelu2(emboss_bias_out, 0, 255); - - void* outline_out = tensorHalfConvolution(emboss_bias_out_clip, outline_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - void * result = tensorHalfRelu2(outline_out, 0, 255); - - - // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation - hpvm_request_tensor(result, 0); - - //dumpOutput(result, "GEO_calib.bin"); - - computePSNRViolation(result, golden_output, 30); - - } - - stopProfiling(); -} - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSME_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSME_half.cc deleted file mode 100644 index e2d2ff18090c085405ec94902696e1a6631d94a7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSME_half.cc +++ /dev/null @@ -1,96 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - - printf("********* Pipeline: Gaussian - Sharpen - Motion Blur - Emboss ********** \n"); - - startProfiling(); - - //long int test_batch_size = 4572; - long int test_batch_size = 2000; - long int H = 240; - long int W = 300; - - printf("Reading input\n"); - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - - for(int i = 0; i < total_runs; i++){ - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* gaussian_out = tensorHalfConvolution(input, gaussian_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * gaussian_out_clip = tensorHalfRelu2(gaussian_out, 0, 255); - - void* sharpen_out = tensorHalfConvolution(gaussian_out_clip, sharpen_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - void * sharpen_out_clip = tensorHalfRelu2(sharpen_out, 0, 255); - - void* motionblur_out = tensorHalfConvolution(sharpen_out_clip, motionblur_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * motionblur_out_clip = tensorHalfRelu2(motionblur_out, 0, 255); - - void* emboss_out = tensorHalfConvolution(motionblur_out_clip, emboss_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - void* emboss_bias_out = tensorHalfAdd(emboss_out, emboss_bias); - void* result = tensorHalfRelu2(emboss_bias_out, 0, 255); - - - //void* result = gaussian_out; - - // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation - //hpvm_request_tensor(result, 0); - - //dumpOutput(result, "GSME_calib.bin"); - - //computePSNRViolation(result, golden_output, 30); - } - - stopProfiling(); -} - - -int main(int argc, char* argv[]){ - - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSM_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSM_half.cc deleted file mode 100644 index 915dc596252576fb39dca073793d618d21634509..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp16/pipeline_GSM_half.cc +++ /dev/null @@ -1,91 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - - printf("********* Pipeline: Gaussian - Sharpen - Motion Blur ********** \n"); - - // Start Profiling - startProfiling(); - - //long int test_batch_size = 9145; - long int test_batch_size = 2000; - long int H = 240; - long int W = 300; - - printf("Reading input\n"); - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - - for(int i = 0; i < total_runs; i++){ - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* gaussian_out = tensorHalfConvolution(input, gaussian_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * gaussian_out_clip = tensorHalfRelu2(gaussian_out, 0, 255); - - void* sharpen_out = tensorHalfConvolution(gaussian_out_clip, sharpen_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - void * sharpen_out_clip = tensorHalfRelu2(sharpen_out, 0, 255); - - void* motionblur_out = tensorHalfConvolution(sharpen_out_clip, motionblur_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * result = tensorHalfRelu2(motionblur_out, 0, 255); - - - // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation - //hpvm_request_tensor(result, 0); - - //dumpOutput(result, "GSM_calib.bin"); - - //computePSNRViolation(result, golden_output, 30); - - } - - stopProfiling(); -} - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_clipped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_clipped.cc deleted file mode 100644 index 575f9b164f865afe268a4692ee6c4fd88b6a45c6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_clipped.cc +++ /dev/null @@ -1,132 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" -#include "../include/types.h" - - -bool Opentuner_run = false; - -void test_2_Layer_clipped_FC(){ - - int total_runs = 10; - - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* 2-Layer FC with clipped activations and weights ********* \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 5000; - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin", - float_type, 1, 1, 784, 128); - void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin", - float_type, 1, 128, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin", - float_type, 1, 1, 128, 10); - void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start execution profiling Tensor ops - startProfiling(); - - // Layer-1 - void* fc1out = tensorGemmGPU(input, fc1_weights); - - void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); - - void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2); - - // Layer-2 - void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); - - void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); - - void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2); - - void* result = tensorSoftmax(fc2_relu); - - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } - -} - - -// If an argument is passed - the run goes into OpenTuner mode - waiting on a pipe -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - // This initializes the runtime - must be called before anything - llvm_hpvm_initTensorRt(0); - - test_2_Layer_clipped_FC(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_cpu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_cpu.cc deleted file mode 100644 index 41343afa0484d022758ee690e69f38221c9ece10..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc2_cpu.cc +++ /dev/null @@ -1,66 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../tensor_runtime/include/tensor_cpu_runtime.h" -#include "../include/utils_cpu.h" -#include "../include/types.h" - - -void FC2(){ - - printf("********* 2-Layer FC with clipped activations and weights ********* \n"); - - int test_batch_size = 100; - - uint8_t* labels = readLabels("./model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readTrainedWeightsCPU("./model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - - void* fc1_weights = readTrainedWeightsCPU("./model_params/fc2_clipped/fc1.bin", - float_type, 1, 1, 784, 128); - void* fc1_bias = readTrainedWeightsCPU("./model_params/fc2_clipped/fc1_bias.bin", - float_type, 1, 128, 1, 1); - void* fc2_weights = readTrainedWeightsCPU("./model_params/fc2_clipped/fc2.bin", - float_type, 1, 1, 128, 10); - void* fc2_bias = readTrainedWeightsCPU("./model_params/fc2_clipped/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - // Layer-1 - void* fc1out = tensorGemmCPU(input, fc1_weights); - - void* fc1_bias_out = tensorAddCPU(fc1out, fc1_bias); - - void* fc1_relu = tensorRelu2CPU(fc1_bias_out, 0, 2); - - // Layer-2 - void* fc2out = tensorGemmCPU(fc1_relu, fc2_weights); - - void* fc2_bias_out = tensorAddCPU(fc2out, fc2_bias); - - void* fc2_relu = tensorRelu2CPU(fc2_bias_out, 0, 2); - - void* result = tensorSoftmaxCPU(fc2_relu); - - computeAccuracy2(labels, test_batch_size, result); - -} - - -// If an argument is passed - the run goes into OpenTuner mode - waiting on a pipe -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - FC2(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc3_clipped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc3_clipped.cc deleted file mode 100644 index f566fd98a717698966c258377c32eda9ee30739d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc3_clipped.cc +++ /dev/null @@ -1,151 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" -#include "../include/types.h" - - - - -bool Opentuner_run = false; - - -void test_3_Layer_clipped_FC(){ - - - int total_runs = 10000; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* 3-Layer FC with clipped activations and weights ********* \n"); - - int test_batch_size = 5000; - - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeights("../model_params/fc3_clipped/fc1.bin", - float_type, 1, 1, 784, 256); - void* fc1_bias = readTrainedWeights("../model_params/fc3_clipped/fc1_bias.bin", - float_type, 1, 256, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/fc3_clipped/fc2.bin", - float_type, 1, 1, 256, 128); - void* fc2_bias = readTrainedWeights("../model_params/fc3_clipped/fc2_bias.bin", - float_type, 1, 128, 1, 1); - void* fc3_weights = readTrainedWeights("../model_params/fc3_clipped/fc3.bin", - float_type, 1, 1, 128, 10); - void* fc3_bias = readTrainedWeights("../model_params/fc3_clipped/fc3_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - // Start execution profiling Tensor ops - startProfiling(); - - - // Layer-1 - void* fc1out = tensorGemmGPU(input, fc1_weights); - - void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); - - void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2); - - // Layer-2 - void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); - - void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); - - void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2); - - // Layer-3 - void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights); - - void* fc3_bias_out = tensorAdd(fc3out, fc3_bias); - - void* fc3_relu = tensorRelu2(fc3_bias_out, 0, 2); - - void* result = tensorSoftmax(fc3_relu); - - - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - -} - - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - // This initializes the runtime - must be called before anything - llvm_hpvm_initTensorRt(0); - - test_3_Layer_clipped_FC(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_clipped.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_clipped.cc deleted file mode 100644 index 24a4d888124c43e98c7b78a33c3b5eb29250808d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_clipped.cc +++ /dev/null @@ -1,156 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" -#include "../include/types.h" - - -bool Opentuner_run = false; - - -void test_4_Layer_clipped_FC(){ - - int total_runs = 200; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* 3-Layer FC with clipped activations and weights ********* \n"); - - int test_batch_size = 5000; - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeights("../model_params/fc4_clipped/fc1.bin", - float_type, 1, 1, 784, 512); - void* fc1_bias = readTrainedWeights("../model_params/fc4_clipped/fc1_bias.bin", - float_type, 1, 512, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/fc4_clipped/fc2.bin", - float_type, 1, 1, 512, 256); - void* fc2_bias = readTrainedWeights("../model_params/fc4_clipped/fc2_bias.bin", - float_type, 1, 256, 1, 1); - void* fc3_weights = readTrainedWeights("../model_params/fc4_clipped/fc3.bin", - float_type, 1, 1, 256, 128); - void* fc3_bias = readTrainedWeights("../model_params/fc4_clipped/fc3_bias.bin", - float_type, 1, 128, 1, 1); - void* fc4_weights = readTrainedWeights("../model_params/fc4_clipped/fc4.bin", - float_type, 1, 1, 128, 10); - void* fc4_bias = readTrainedWeights("../model_params/fc4_clipped/fc4_bias.bin", - float_type, 1, 10, 1, 1); - - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start execution profiling Tensor ops - startProfiling(); - - // Layer-1 - void* fc1out = tensorGemmGPU(input, fc1_weights); - - void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); - - void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2); - - // Layer-2 - void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); - - void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); - - void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2); - - // Layer-3 - void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights); - - void* fc3_bias_out = tensorAdd(fc3out, fc3_bias); - - void* fc3_relu = tensorRelu2(fc3_bias_out, 0, 2); - - // Layer-4 - void* fc4out = tensorGemmGPU(fc3_relu, fc4_weights); - - void* fc4_bias_out = tensorAdd(fc4out, fc4_bias); - - void* fc4_relu = tensorRelu2(fc4_bias_out, 0, 2); - - void* result = tensorSoftmax(fc4_relu); - - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } - - -} - - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - // This initializes the runtime - must be called before anything - llvm_hpvm_initTensorRt(0); - - test_4_Layer_clipped_FC(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_cpu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_cpu.cc deleted file mode 100644 index b7f6e1eb5256ebd0dbcf718d3e8e30f0d93ecbc5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/fc4_cpu.cc +++ /dev/null @@ -1,143 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../tensor_runtime/include/tensor_cpu_runtime.h" -#include "../include/utils_cpu.h" -#include "../include/types.h" - - -bool Opentuner_run = false; - - -void test_4_Layer_clipped_FC(){ - - int total_runs = 200; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* 3-Layer FC with clipped activations and weights ********* \n"); - - int test_batch_size = 500; - - uint8_t* labels = readLabels("./model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readTrainedWeightsCPU("./model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeightsCPU("./model_params/fc4_clipped/fc1.bin", - float_type, 1, 1, 784, 512); - void* fc1_bias = readTrainedWeightsCPU("./model_params/fc4_clipped/fc1_bias.bin", - float_type, 1, 512, 1, 1); - void* fc2_weights = readTrainedWeightsCPU("./model_params/fc4_clipped/fc2.bin", - float_type, 1, 1, 512, 256); - void* fc2_bias = readTrainedWeightsCPU("./model_params/fc4_clipped/fc2_bias.bin", - float_type, 1, 256, 1, 1); - void* fc3_weights = readTrainedWeightsCPU("./model_params/fc4_clipped/fc3.bin", - float_type, 1, 1, 256, 128); - void* fc3_bias = readTrainedWeightsCPU("./model_params/fc4_clipped/fc3_bias.bin", - float_type, 1, 128, 1, 1); - void* fc4_weights = readTrainedWeightsCPU("./model_params/fc4_clipped/fc4.bin", - float_type, 1, 1, 128, 10); - void* fc4_bias = readTrainedWeightsCPU("./model_params/fc4_clipped/fc4_bias.bin", - float_type, 1, 10, 1, 1); - - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - - // Layer-1 - void* fc1out = tensorGemmCPU(input, fc1_weights); - - void* fc1_bias_out = tensorAddCPU(fc1out, fc1_bias); - - void* fc1_relu = tensorRelu2CPU(fc1_bias_out, 0, 2); - - // Layer-2 - void* fc2out = tensorGemmCPU(fc1_relu, fc2_weights); - - void* fc2_bias_out = tensorAddCPU(fc2out, fc2_bias); - - void* fc2_relu = tensorRelu2CPU(fc2_bias_out, 0, 2); - - // Layer-3 - void* fc3out = tensorGemmCPU(fc2_relu, fc3_weights); - - void* fc3_bias_out = tensorAddCPU(fc3out, fc3_bias); - - void* fc3_relu = tensorRelu2CPU(fc3_bias_out, 0, 2); - - // Layer-4 - void* fc4out = tensorGemmCPU(fc3_relu, fc4_weights); - - void* fc4_bias_out = tensorAddCPU(fc4out, fc4_bias); - - void* fc4_relu = tensorRelu2CPU(fc4_bias_out, 0, 2); - - void* result = tensorSoftmaxCPU(fc4_relu); - - computeAccuracy2(labels, test_batch_size, result); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } - - -} - - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - // This initializes the runtime - must be called before anything - llvm_hpvm_initTensorRt(0); - - test_4_Layer_clipped_FC(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10.cc deleted file mode 100644 index 80a55dfd6b138ffa23b1ac9a7148025a491948ed..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10.cc +++ /dev/null @@ -1,413 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/mobilenet_quant/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); - void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); - void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); - void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); - void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); - void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); - void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); - std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); - void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorRelu(var_8); - void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorRelu(var_27); - void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_41 = tensorConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_46 = tensorRelu(var_45); - void* var_48 = tensorConvolution(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_49 = tensorBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); - void* var_52 = tensorBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_53 = tensorRelu(var_52); - void* var_55 = tensorConvolution(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_56 = tensorBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_57 = tensorRelu(var_56); - void* var_58 = tensorConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); - void* var_59 = tensorBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_60 = tensorRelu(var_59); - void* var_63 = tensorConvolution(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_64 = tensorBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_65 = tensorRelu(var_64); - void* var_66 = tensorConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_68 = tensorRelu(var_67); - void* var_70 = tensorConvolution(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_71 = tensorBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_72 = tensorRelu(var_71); - void* var_73 = tensorConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); - void* var_74 = tensorBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_75 = tensorRelu(var_74); - void* var_77 = tensorConvolution(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_78 = tensorBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_79 = tensorRelu(var_78); - void* var_80 = tensorConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); - void* var_81 = tensorBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_82 = tensorRelu(var_81); - void* var_85 = tensorConvolution(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_86 = tensorBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_87 = tensorRelu(var_86); - void* var_88 = tensorConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); - void* var_89 = tensorBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_90 = tensorRelu(var_89); - void* var_92 = tensorConvolution(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_93 = tensorBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_94 = tensorRelu(var_93); - void* var_95 = tensorConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); - void* var_96 = tensorBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_97 = tensorRelu(var_96); - void* var_99 = tensorPooling(var_97,1,2,2,0,0,2,2); - void* var_101 = tensorGemmGPU(var_99, dense_1_w); - void* var_102 = tensorAdd(var_101, dense_1_b); - void* var_103 = tensorSoftmax(var_102); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_103); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10_shallow.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10_shallow.cc deleted file mode 100644 index 89aa451dc1a5ee7b532bd5375e2e71e520c1372e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_cifar10_shallow.cc +++ /dev/null @@ -1,242 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(int argc, char* argv[]){ - - int total_runs = 1; - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - - llvm_hpvm_initTensorRt(0); - - //std::string dir_prefix = std::string("../../keras/data/mobilenet_shallow_nathan/"); - - std::string dir_prefix = std::string("../model_params/mobilenet_cifar10_shallow/"); - - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 1000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - - - for(int j = 0; j < total_runs; j++){ - float final_accuracy = 0.0; - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorRelu(var_8); - void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorRelu(var_27); - void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_41 = tensorConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_46 = tensorRelu(var_45); - void* var_47 = tensorPooling(var_46,1,2,2,0,0,2,2); - void* var_49 = tensorGemmGPU(var_47, dense_1_w); - void* var_50 = tensorAdd(var_49, dense_1_b); - void* var_51 = tensorSoftmax(var_50); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_51); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_shallow.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_shallow.cc deleted file mode 100644 index 8905a93edb825b36c9e301ad1e450428740b4cb1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/mobilenet_shallow.cc +++ /dev/null @@ -1,203 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(1); - - std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 2000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorRelu(var_8); - void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 2, 2, 1, 64); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorRelu(var_27); - void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_40 = tensorPooling(var_38,1,2,2,0,0,2,2); - void* var_42 = tensorGemmGPU(var_40, dense_1_w); - void* var_43 = tensorAdd(var_42, dense_1_b); - void* var_44 = tensorSoftmax(var_43); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_44); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/resnet_imagenet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/resnet_imagenet.cc deleted file mode 100644 index 6f180e67f12844d20d2782267323f8631b421431..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/fp32/resnet_imagenet.cc +++ /dev/null @@ -1,927 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "tensor_runtime.h" -#include "utils.h" - - - - -int main(){ - - llvm_hpvm_initTensorRt(1); - - - std::string dir_prefix = std::string("/shared/hsharif3/resnet50_imagenet/"); - std::string input_path = dir_prefix + std::string("test_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,7,7); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,1,1); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,64,256,1,1); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,64,256,1,1); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,256,64,1,1); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,128,256,1,1); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,512,256,1,1); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,128,512,1,1); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,128,512,1,1); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_22_w_path = dir_prefix + std::string("conv2d_22_w.bin"); - void* conv2d_22_w = readTrainedWeights(conv2d_22_w_path.c_str(), 0,128,512,1,1); - std::string conv2d_22_b_path = dir_prefix + std::string("conv2d_22_b.bin"); - void* conv2d_22_b = readTrainedWeights(conv2d_22_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_23_w_path = dir_prefix + std::string("conv2d_23_w.bin"); - void* conv2d_23_w = readTrainedWeights(conv2d_23_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_23_b_path = dir_prefix + std::string("conv2d_23_b.bin"); - void* conv2d_23_b = readTrainedWeights(conv2d_23_b_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_24_w_path = dir_prefix + std::string("conv2d_24_w.bin"); - void* conv2d_24_w = readTrainedWeights(conv2d_24_w_path.c_str(), 0,512,128,1,1); - std::string conv2d_24_b_path = dir_prefix + std::string("conv2d_24_b.bin"); - void* conv2d_24_b = readTrainedWeights(conv2d_24_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_25_w_path = dir_prefix + std::string("conv2d_25_w.bin"); - void* conv2d_25_w = readTrainedWeights(conv2d_25_w_path.c_str(), 0,256,512,1,1); - std::string conv2d_25_b_path = dir_prefix + std::string("conv2d_25_b.bin"); - void* conv2d_25_b = readTrainedWeights(conv2d_25_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_26_w_path = dir_prefix + std::string("conv2d_26_w.bin"); - void* conv2d_26_w = readTrainedWeights(conv2d_26_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_26_b_path = dir_prefix + std::string("conv2d_26_b.bin"); - void* conv2d_26_b = readTrainedWeights(conv2d_26_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_27_w_path = dir_prefix + std::string("conv2d_27_w.bin"); - void* conv2d_27_w = readTrainedWeights(conv2d_27_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_27_b_path = dir_prefix + std::string("conv2d_27_b.bin"); - void* conv2d_27_b = readTrainedWeights(conv2d_27_b_path.c_str(), 0,1,1024,1,1); - std::string conv2d_28_w_path = dir_prefix + std::string("conv2d_28_w.bin"); - void* conv2d_28_w = readTrainedWeights(conv2d_28_w_path.c_str(), 0,1024,512,1,1); - std::string conv2d_28_b_path = dir_prefix + std::string("conv2d_28_b.bin"); - void* conv2d_28_b = readTrainedWeights(conv2d_28_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_gamma_path = dir_prefix + std::string("batch_normalization_28_gamma.bin"); - void* batch_normalization_28_gamma = readTrainedWeights(batch_normalization_28_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_beta_path = dir_prefix + std::string("batch_normalization_28_beta.bin"); - void* batch_normalization_28_beta = readTrainedWeights(batch_normalization_28_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_mean_path = dir_prefix + std::string("batch_normalization_28_mean.bin"); - void* batch_normalization_28_mean = readTrainedWeights(batch_normalization_28_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_28_variance_path = dir_prefix + std::string("batch_normalization_28_variance.bin"); - void* batch_normalization_28_variance = readTrainedWeights(batch_normalization_28_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_29_w_path = dir_prefix + std::string("conv2d_29_w.bin"); - void* conv2d_29_w = readTrainedWeights(conv2d_29_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_29_b_path = dir_prefix + std::string("conv2d_29_b.bin"); - void* conv2d_29_b = readTrainedWeights(conv2d_29_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_gamma_path = dir_prefix + std::string("batch_normalization_29_gamma.bin"); - void* batch_normalization_29_gamma = readTrainedWeights(batch_normalization_29_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_beta_path = dir_prefix + std::string("batch_normalization_29_beta.bin"); - void* batch_normalization_29_beta = readTrainedWeights(batch_normalization_29_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_mean_path = dir_prefix + std::string("batch_normalization_29_mean.bin"); - void* batch_normalization_29_mean = readTrainedWeights(batch_normalization_29_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_29_variance_path = dir_prefix + std::string("batch_normalization_29_variance.bin"); - void* batch_normalization_29_variance = readTrainedWeights(batch_normalization_29_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_30_w_path = dir_prefix + std::string("conv2d_30_w.bin"); - void* conv2d_30_w = readTrainedWeights(conv2d_30_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_30_b_path = dir_prefix + std::string("conv2d_30_b.bin"); - void* conv2d_30_b = readTrainedWeights(conv2d_30_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_gamma_path = dir_prefix + std::string("batch_normalization_30_gamma.bin"); - void* batch_normalization_30_gamma = readTrainedWeights(batch_normalization_30_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_beta_path = dir_prefix + std::string("batch_normalization_30_beta.bin"); - void* batch_normalization_30_beta = readTrainedWeights(batch_normalization_30_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_mean_path = dir_prefix + std::string("batch_normalization_30_mean.bin"); - void* batch_normalization_30_mean = readTrainedWeights(batch_normalization_30_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_30_variance_path = dir_prefix + std::string("batch_normalization_30_variance.bin"); - void* batch_normalization_30_variance = readTrainedWeights(batch_normalization_30_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_31_w_path = dir_prefix + std::string("conv2d_31_w.bin"); - void* conv2d_31_w = readTrainedWeights(conv2d_31_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_31_b_path = dir_prefix + std::string("conv2d_31_b.bin"); - void* conv2d_31_b = readTrainedWeights(conv2d_31_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_gamma_path = dir_prefix + std::string("batch_normalization_31_gamma.bin"); - void* batch_normalization_31_gamma = readTrainedWeights(batch_normalization_31_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_beta_path = dir_prefix + std::string("batch_normalization_31_beta.bin"); - void* batch_normalization_31_beta = readTrainedWeights(batch_normalization_31_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_mean_path = dir_prefix + std::string("batch_normalization_31_mean.bin"); - void* batch_normalization_31_mean = readTrainedWeights(batch_normalization_31_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_31_variance_path = dir_prefix + std::string("batch_normalization_31_variance.bin"); - void* batch_normalization_31_variance = readTrainedWeights(batch_normalization_31_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_32_w_path = dir_prefix + std::string("conv2d_32_w.bin"); - void* conv2d_32_w = readTrainedWeights(conv2d_32_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_32_b_path = dir_prefix + std::string("conv2d_32_b.bin"); - void* conv2d_32_b = readTrainedWeights(conv2d_32_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_gamma_path = dir_prefix + std::string("batch_normalization_32_gamma.bin"); - void* batch_normalization_32_gamma = readTrainedWeights(batch_normalization_32_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_beta_path = dir_prefix + std::string("batch_normalization_32_beta.bin"); - void* batch_normalization_32_beta = readTrainedWeights(batch_normalization_32_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_mean_path = dir_prefix + std::string("batch_normalization_32_mean.bin"); - void* batch_normalization_32_mean = readTrainedWeights(batch_normalization_32_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_32_variance_path = dir_prefix + std::string("batch_normalization_32_variance.bin"); - void* batch_normalization_32_variance = readTrainedWeights(batch_normalization_32_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_33_w_path = dir_prefix + std::string("conv2d_33_w.bin"); - void* conv2d_33_w = readTrainedWeights(conv2d_33_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_33_b_path = dir_prefix + std::string("conv2d_33_b.bin"); - void* conv2d_33_b = readTrainedWeights(conv2d_33_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_gamma_path = dir_prefix + std::string("batch_normalization_33_gamma.bin"); - void* batch_normalization_33_gamma = readTrainedWeights(batch_normalization_33_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_beta_path = dir_prefix + std::string("batch_normalization_33_beta.bin"); - void* batch_normalization_33_beta = readTrainedWeights(batch_normalization_33_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_mean_path = dir_prefix + std::string("batch_normalization_33_mean.bin"); - void* batch_normalization_33_mean = readTrainedWeights(batch_normalization_33_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_33_variance_path = dir_prefix + std::string("batch_normalization_33_variance.bin"); - void* batch_normalization_33_variance = readTrainedWeights(batch_normalization_33_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_34_w_path = dir_prefix + std::string("conv2d_34_w.bin"); - void* conv2d_34_w = readTrainedWeights(conv2d_34_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_34_b_path = dir_prefix + std::string("conv2d_34_b.bin"); - void* conv2d_34_b = readTrainedWeights(conv2d_34_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_gamma_path = dir_prefix + std::string("batch_normalization_34_gamma.bin"); - void* batch_normalization_34_gamma = readTrainedWeights(batch_normalization_34_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_beta_path = dir_prefix + std::string("batch_normalization_34_beta.bin"); - void* batch_normalization_34_beta = readTrainedWeights(batch_normalization_34_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_mean_path = dir_prefix + std::string("batch_normalization_34_mean.bin"); - void* batch_normalization_34_mean = readTrainedWeights(batch_normalization_34_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_34_variance_path = dir_prefix + std::string("batch_normalization_34_variance.bin"); - void* batch_normalization_34_variance = readTrainedWeights(batch_normalization_34_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_35_w_path = dir_prefix + std::string("conv2d_35_w.bin"); - void* conv2d_35_w = readTrainedWeights(conv2d_35_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_35_b_path = dir_prefix + std::string("conv2d_35_b.bin"); - void* conv2d_35_b = readTrainedWeights(conv2d_35_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_gamma_path = dir_prefix + std::string("batch_normalization_35_gamma.bin"); - void* batch_normalization_35_gamma = readTrainedWeights(batch_normalization_35_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_beta_path = dir_prefix + std::string("batch_normalization_35_beta.bin"); - void* batch_normalization_35_beta = readTrainedWeights(batch_normalization_35_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_mean_path = dir_prefix + std::string("batch_normalization_35_mean.bin"); - void* batch_normalization_35_mean = readTrainedWeights(batch_normalization_35_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_35_variance_path = dir_prefix + std::string("batch_normalization_35_variance.bin"); - void* batch_normalization_35_variance = readTrainedWeights(batch_normalization_35_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_36_w_path = dir_prefix + std::string("conv2d_36_w.bin"); - void* conv2d_36_w = readTrainedWeights(conv2d_36_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_36_b_path = dir_prefix + std::string("conv2d_36_b.bin"); - void* conv2d_36_b = readTrainedWeights(conv2d_36_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_gamma_path = dir_prefix + std::string("batch_normalization_36_gamma.bin"); - void* batch_normalization_36_gamma = readTrainedWeights(batch_normalization_36_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_beta_path = dir_prefix + std::string("batch_normalization_36_beta.bin"); - void* batch_normalization_36_beta = readTrainedWeights(batch_normalization_36_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_mean_path = dir_prefix + std::string("batch_normalization_36_mean.bin"); - void* batch_normalization_36_mean = readTrainedWeights(batch_normalization_36_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_36_variance_path = dir_prefix + std::string("batch_normalization_36_variance.bin"); - void* batch_normalization_36_variance = readTrainedWeights(batch_normalization_36_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_37_w_path = dir_prefix + std::string("conv2d_37_w.bin"); - void* conv2d_37_w = readTrainedWeights(conv2d_37_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_37_b_path = dir_prefix + std::string("conv2d_37_b.bin"); - void* conv2d_37_b = readTrainedWeights(conv2d_37_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_gamma_path = dir_prefix + std::string("batch_normalization_37_gamma.bin"); - void* batch_normalization_37_gamma = readTrainedWeights(batch_normalization_37_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_beta_path = dir_prefix + std::string("batch_normalization_37_beta.bin"); - void* batch_normalization_37_beta = readTrainedWeights(batch_normalization_37_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_mean_path = dir_prefix + std::string("batch_normalization_37_mean.bin"); - void* batch_normalization_37_mean = readTrainedWeights(batch_normalization_37_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_37_variance_path = dir_prefix + std::string("batch_normalization_37_variance.bin"); - void* batch_normalization_37_variance = readTrainedWeights(batch_normalization_37_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_38_w_path = dir_prefix + std::string("conv2d_38_w.bin"); - void* conv2d_38_w = readTrainedWeights(conv2d_38_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_38_b_path = dir_prefix + std::string("conv2d_38_b.bin"); - void* conv2d_38_b = readTrainedWeights(conv2d_38_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_gamma_path = dir_prefix + std::string("batch_normalization_38_gamma.bin"); - void* batch_normalization_38_gamma = readTrainedWeights(batch_normalization_38_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_beta_path = dir_prefix + std::string("batch_normalization_38_beta.bin"); - void* batch_normalization_38_beta = readTrainedWeights(batch_normalization_38_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_mean_path = dir_prefix + std::string("batch_normalization_38_mean.bin"); - void* batch_normalization_38_mean = readTrainedWeights(batch_normalization_38_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_38_variance_path = dir_prefix + std::string("batch_normalization_38_variance.bin"); - void* batch_normalization_38_variance = readTrainedWeights(batch_normalization_38_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_39_w_path = dir_prefix + std::string("conv2d_39_w.bin"); - void* conv2d_39_w = readTrainedWeights(conv2d_39_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_39_b_path = dir_prefix + std::string("conv2d_39_b.bin"); - void* conv2d_39_b = readTrainedWeights(conv2d_39_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_gamma_path = dir_prefix + std::string("batch_normalization_39_gamma.bin"); - void* batch_normalization_39_gamma = readTrainedWeights(batch_normalization_39_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_beta_path = dir_prefix + std::string("batch_normalization_39_beta.bin"); - void* batch_normalization_39_beta = readTrainedWeights(batch_normalization_39_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_mean_path = dir_prefix + std::string("batch_normalization_39_mean.bin"); - void* batch_normalization_39_mean = readTrainedWeights(batch_normalization_39_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_39_variance_path = dir_prefix + std::string("batch_normalization_39_variance.bin"); - void* batch_normalization_39_variance = readTrainedWeights(batch_normalization_39_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_40_w_path = dir_prefix + std::string("conv2d_40_w.bin"); - void* conv2d_40_w = readTrainedWeights(conv2d_40_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_40_b_path = dir_prefix + std::string("conv2d_40_b.bin"); - void* conv2d_40_b = readTrainedWeights(conv2d_40_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_gamma_path = dir_prefix + std::string("batch_normalization_40_gamma.bin"); - void* batch_normalization_40_gamma = readTrainedWeights(batch_normalization_40_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_beta_path = dir_prefix + std::string("batch_normalization_40_beta.bin"); - void* batch_normalization_40_beta = readTrainedWeights(batch_normalization_40_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_mean_path = dir_prefix + std::string("batch_normalization_40_mean.bin"); - void* batch_normalization_40_mean = readTrainedWeights(batch_normalization_40_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_40_variance_path = dir_prefix + std::string("batch_normalization_40_variance.bin"); - void* batch_normalization_40_variance = readTrainedWeights(batch_normalization_40_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_41_w_path = dir_prefix + std::string("conv2d_41_w.bin"); - void* conv2d_41_w = readTrainedWeights(conv2d_41_w_path.c_str(), 0,256,1024,1,1); - std::string conv2d_41_b_path = dir_prefix + std::string("conv2d_41_b.bin"); - void* conv2d_41_b = readTrainedWeights(conv2d_41_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_gamma_path = dir_prefix + std::string("batch_normalization_41_gamma.bin"); - void* batch_normalization_41_gamma = readTrainedWeights(batch_normalization_41_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_beta_path = dir_prefix + std::string("batch_normalization_41_beta.bin"); - void* batch_normalization_41_beta = readTrainedWeights(batch_normalization_41_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_mean_path = dir_prefix + std::string("batch_normalization_41_mean.bin"); - void* batch_normalization_41_mean = readTrainedWeights(batch_normalization_41_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_41_variance_path = dir_prefix + std::string("batch_normalization_41_variance.bin"); - void* batch_normalization_41_variance = readTrainedWeights(batch_normalization_41_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_42_w_path = dir_prefix + std::string("conv2d_42_w.bin"); - void* conv2d_42_w = readTrainedWeights(conv2d_42_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_42_b_path = dir_prefix + std::string("conv2d_42_b.bin"); - void* conv2d_42_b = readTrainedWeights(conv2d_42_b_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_gamma_path = dir_prefix + std::string("batch_normalization_42_gamma.bin"); - void* batch_normalization_42_gamma = readTrainedWeights(batch_normalization_42_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_beta_path = dir_prefix + std::string("batch_normalization_42_beta.bin"); - void* batch_normalization_42_beta = readTrainedWeights(batch_normalization_42_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_mean_path = dir_prefix + std::string("batch_normalization_42_mean.bin"); - void* batch_normalization_42_mean = readTrainedWeights(batch_normalization_42_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_42_variance_path = dir_prefix + std::string("batch_normalization_42_variance.bin"); - void* batch_normalization_42_variance = readTrainedWeights(batch_normalization_42_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_43_w_path = dir_prefix + std::string("conv2d_43_w.bin"); - void* conv2d_43_w = readTrainedWeights(conv2d_43_w_path.c_str(), 0,1024,256,1,1); - std::string conv2d_43_b_path = dir_prefix + std::string("conv2d_43_b.bin"); - void* conv2d_43_b = readTrainedWeights(conv2d_43_b_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_gamma_path = dir_prefix + std::string("batch_normalization_43_gamma.bin"); - void* batch_normalization_43_gamma = readTrainedWeights(batch_normalization_43_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_beta_path = dir_prefix + std::string("batch_normalization_43_beta.bin"); - void* batch_normalization_43_beta = readTrainedWeights(batch_normalization_43_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_mean_path = dir_prefix + std::string("batch_normalization_43_mean.bin"); - void* batch_normalization_43_mean = readTrainedWeights(batch_normalization_43_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_43_variance_path = dir_prefix + std::string("batch_normalization_43_variance.bin"); - void* batch_normalization_43_variance = readTrainedWeights(batch_normalization_43_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_44_w_path = dir_prefix + std::string("conv2d_44_w.bin"); - void* conv2d_44_w = readTrainedWeights(conv2d_44_w_path.c_str(), 0,512,1024,1,1); - std::string conv2d_44_b_path = dir_prefix + std::string("conv2d_44_b.bin"); - void* conv2d_44_b = readTrainedWeights(conv2d_44_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_gamma_path = dir_prefix + std::string("batch_normalization_44_gamma.bin"); - void* batch_normalization_44_gamma = readTrainedWeights(batch_normalization_44_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_beta_path = dir_prefix + std::string("batch_normalization_44_beta.bin"); - void* batch_normalization_44_beta = readTrainedWeights(batch_normalization_44_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_mean_path = dir_prefix + std::string("batch_normalization_44_mean.bin"); - void* batch_normalization_44_mean = readTrainedWeights(batch_normalization_44_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_44_variance_path = dir_prefix + std::string("batch_normalization_44_variance.bin"); - void* batch_normalization_44_variance = readTrainedWeights(batch_normalization_44_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_45_w_path = dir_prefix + std::string("conv2d_45_w.bin"); - void* conv2d_45_w = readTrainedWeights(conv2d_45_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_45_b_path = dir_prefix + std::string("conv2d_45_b.bin"); - void* conv2d_45_b = readTrainedWeights(conv2d_45_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_gamma_path = dir_prefix + std::string("batch_normalization_45_gamma.bin"); - void* batch_normalization_45_gamma = readTrainedWeights(batch_normalization_45_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_beta_path = dir_prefix + std::string("batch_normalization_45_beta.bin"); - void* batch_normalization_45_beta = readTrainedWeights(batch_normalization_45_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_mean_path = dir_prefix + std::string("batch_normalization_45_mean.bin"); - void* batch_normalization_45_mean = readTrainedWeights(batch_normalization_45_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_45_variance_path = dir_prefix + std::string("batch_normalization_45_variance.bin"); - void* batch_normalization_45_variance = readTrainedWeights(batch_normalization_45_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_46_w_path = dir_prefix + std::string("conv2d_46_w.bin"); - void* conv2d_46_w = readTrainedWeights(conv2d_46_w_path.c_str(), 0,2048,512,1,1); - std::string conv2d_46_b_path = dir_prefix + std::string("conv2d_46_b.bin"); - void* conv2d_46_b = readTrainedWeights(conv2d_46_b_path.c_str(), 0,1,2048,1,1); - std::string conv2d_47_w_path = dir_prefix + std::string("conv2d_47_w.bin"); - void* conv2d_47_w = readTrainedWeights(conv2d_47_w_path.c_str(), 0,2048,1024,1,1); - std::string conv2d_47_b_path = dir_prefix + std::string("conv2d_47_b.bin"); - void* conv2d_47_b = readTrainedWeights(conv2d_47_b_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_gamma_path = dir_prefix + std::string("batch_normalization_46_gamma.bin"); - void* batch_normalization_46_gamma = readTrainedWeights(batch_normalization_46_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_beta_path = dir_prefix + std::string("batch_normalization_46_beta.bin"); - void* batch_normalization_46_beta = readTrainedWeights(batch_normalization_46_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_mean_path = dir_prefix + std::string("batch_normalization_46_mean.bin"); - void* batch_normalization_46_mean = readTrainedWeights(batch_normalization_46_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_46_variance_path = dir_prefix + std::string("batch_normalization_46_variance.bin"); - void* batch_normalization_46_variance = readTrainedWeights(batch_normalization_46_variance_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_gamma_path = dir_prefix + std::string("batch_normalization_47_gamma.bin"); - void* batch_normalization_47_gamma = readTrainedWeights(batch_normalization_47_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_beta_path = dir_prefix + std::string("batch_normalization_47_beta.bin"); - void* batch_normalization_47_beta = readTrainedWeights(batch_normalization_47_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_mean_path = dir_prefix + std::string("batch_normalization_47_mean.bin"); - void* batch_normalization_47_mean = readTrainedWeights(batch_normalization_47_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_47_variance_path = dir_prefix + std::string("batch_normalization_47_variance.bin"); - void* batch_normalization_47_variance = readTrainedWeights(batch_normalization_47_variance_path.c_str(), 0,1,2048,1,1); - std::string conv2d_48_w_path = dir_prefix + std::string("conv2d_48_w.bin"); - void* conv2d_48_w = readTrainedWeights(conv2d_48_w_path.c_str(), 0,512,2048,1,1); - std::string conv2d_48_b_path = dir_prefix + std::string("conv2d_48_b.bin"); - void* conv2d_48_b = readTrainedWeights(conv2d_48_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_gamma_path = dir_prefix + std::string("batch_normalization_48_gamma.bin"); - void* batch_normalization_48_gamma = readTrainedWeights(batch_normalization_48_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_beta_path = dir_prefix + std::string("batch_normalization_48_beta.bin"); - void* batch_normalization_48_beta = readTrainedWeights(batch_normalization_48_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_mean_path = dir_prefix + std::string("batch_normalization_48_mean.bin"); - void* batch_normalization_48_mean = readTrainedWeights(batch_normalization_48_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_48_variance_path = dir_prefix + std::string("batch_normalization_48_variance.bin"); - void* batch_normalization_48_variance = readTrainedWeights(batch_normalization_48_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_49_w_path = dir_prefix + std::string("conv2d_49_w.bin"); - void* conv2d_49_w = readTrainedWeights(conv2d_49_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_49_b_path = dir_prefix + std::string("conv2d_49_b.bin"); - void* conv2d_49_b = readTrainedWeights(conv2d_49_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_gamma_path = dir_prefix + std::string("batch_normalization_49_gamma.bin"); - void* batch_normalization_49_gamma = readTrainedWeights(batch_normalization_49_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_beta_path = dir_prefix + std::string("batch_normalization_49_beta.bin"); - void* batch_normalization_49_beta = readTrainedWeights(batch_normalization_49_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_mean_path = dir_prefix + std::string("batch_normalization_49_mean.bin"); - void* batch_normalization_49_mean = readTrainedWeights(batch_normalization_49_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_49_variance_path = dir_prefix + std::string("batch_normalization_49_variance.bin"); - void* batch_normalization_49_variance = readTrainedWeights(batch_normalization_49_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_50_w_path = dir_prefix + std::string("conv2d_50_w.bin"); - void* conv2d_50_w = readTrainedWeights(conv2d_50_w_path.c_str(), 0,2048,512,1,1); - std::string conv2d_50_b_path = dir_prefix + std::string("conv2d_50_b.bin"); - void* conv2d_50_b = readTrainedWeights(conv2d_50_b_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_gamma_path = dir_prefix + std::string("batch_normalization_50_gamma.bin"); - void* batch_normalization_50_gamma = readTrainedWeights(batch_normalization_50_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_beta_path = dir_prefix + std::string("batch_normalization_50_beta.bin"); - void* batch_normalization_50_beta = readTrainedWeights(batch_normalization_50_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_mean_path = dir_prefix + std::string("batch_normalization_50_mean.bin"); - void* batch_normalization_50_mean = readTrainedWeights(batch_normalization_50_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_50_variance_path = dir_prefix + std::string("batch_normalization_50_variance.bin"); - void* batch_normalization_50_variance = readTrainedWeights(batch_normalization_50_variance_path.c_str(), 0,1,2048,1,1); - std::string conv2d_51_w_path = dir_prefix + std::string("conv2d_51_w.bin"); - void* conv2d_51_w = readTrainedWeights(conv2d_51_w_path.c_str(), 0,512,2048,1,1); - std::string conv2d_51_b_path = dir_prefix + std::string("conv2d_51_b.bin"); - void* conv2d_51_b = readTrainedWeights(conv2d_51_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_gamma_path = dir_prefix + std::string("batch_normalization_51_gamma.bin"); - void* batch_normalization_51_gamma = readTrainedWeights(batch_normalization_51_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_beta_path = dir_prefix + std::string("batch_normalization_51_beta.bin"); - void* batch_normalization_51_beta = readTrainedWeights(batch_normalization_51_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_mean_path = dir_prefix + std::string("batch_normalization_51_mean.bin"); - void* batch_normalization_51_mean = readTrainedWeights(batch_normalization_51_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_51_variance_path = dir_prefix + std::string("batch_normalization_51_variance.bin"); - void* batch_normalization_51_variance = readTrainedWeights(batch_normalization_51_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_52_w_path = dir_prefix + std::string("conv2d_52_w.bin"); - void* conv2d_52_w = readTrainedWeights(conv2d_52_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_52_b_path = dir_prefix + std::string("conv2d_52_b.bin"); - void* conv2d_52_b = readTrainedWeights(conv2d_52_b_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_gamma_path = dir_prefix + std::string("batch_normalization_52_gamma.bin"); - void* batch_normalization_52_gamma = readTrainedWeights(batch_normalization_52_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_beta_path = dir_prefix + std::string("batch_normalization_52_beta.bin"); - void* batch_normalization_52_beta = readTrainedWeights(batch_normalization_52_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_mean_path = dir_prefix + std::string("batch_normalization_52_mean.bin"); - void* batch_normalization_52_mean = readTrainedWeights(batch_normalization_52_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_52_variance_path = dir_prefix + std::string("batch_normalization_52_variance.bin"); - void* batch_normalization_52_variance = readTrainedWeights(batch_normalization_52_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_53_w_path = dir_prefix + std::string("conv2d_53_w.bin"); - void* conv2d_53_w = readTrainedWeights(conv2d_53_w_path.c_str(), 0,2048,512,1,1); - std::string conv2d_53_b_path = dir_prefix + std::string("conv2d_53_b.bin"); - void* conv2d_53_b = readTrainedWeights(conv2d_53_b_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_gamma_path = dir_prefix + std::string("batch_normalization_53_gamma.bin"); - void* batch_normalization_53_gamma = readTrainedWeights(batch_normalization_53_gamma_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_beta_path = dir_prefix + std::string("batch_normalization_53_beta.bin"); - void* batch_normalization_53_beta = readTrainedWeights(batch_normalization_53_beta_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_mean_path = dir_prefix + std::string("batch_normalization_53_mean.bin"); - void* batch_normalization_53_mean = readTrainedWeights(batch_normalization_53_mean_path.c_str(), 0,1,2048,1,1); - std::string batch_normalization_53_variance_path = dir_prefix + std::string("batch_normalization_53_variance.bin"); - void* batch_normalization_53_variance = readTrainedWeights(batch_normalization_53_variance_path.c_str(), 0,1,2048,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,1000); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1000,1,1); - - - - startMemTracking(); - - int test_input_size = 2000; - int batch_size = 100; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,224,224); - - void* var_2 = tensorConvolution(input, conv2d_1_w, 3, 3, 2, 2, 1, 1); - void* var_3 = tensorAdd(var_2, conv2d_1_b); - void* var_4 = tensorRelu(var_3); - // NOTE: Issue is that this pooling has window 2*2 - void* var_5 = tensorPooling(var_4,0,3,3,0,0,2,2); - void* var_6 = tensorBatchNorm(var_5, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorAdd(var_7, conv2d_2_b); - void* var_9 = tensorBatchNorm(var_8, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_10 = tensorRelu(var_9); - void* var_11 = tensorConvolution(var_10, conv2d_3_w, 1, 1, 1, 1, 1, 1); - void* var_12 = tensorAdd(var_11, conv2d_3_b); - void* var_13 = tensorBatchNorm(var_12, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorConvolution(var_14, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_16 = tensorAdd(var_15, conv2d_4_b); - void* var_17 = tensorBatchNorm(var_16, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_18 = tensorConvolution(var_6, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_19 = tensorAdd(var_18, conv2d_5_b); - void* var_20 = tensorBatchNorm(var_19, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_21 = tensorAdd(var_17, var_20); - void* var_22 = tensorRelu(var_21); - void* var_23 = tensorConvolution(var_22, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_24 = tensorAdd(var_23, conv2d_6_b); - void* var_25 = tensorBatchNorm(var_24, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorConvolution(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 1); - void* var_28 = tensorAdd(var_27, conv2d_7_b); - void* var_29 = tensorBatchNorm(var_28, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_30 = tensorRelu(var_29); - void* var_31 = tensorConvolution(var_30, conv2d_8_w, 0, 0, 1, 1, 1, 1); - void* var_32 = tensorAdd(var_31, conv2d_8_b); - void* var_33 = tensorBatchNorm(var_32, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_34 = tensorAdd(var_33, var_22); - void* var_35 = tensorRelu(var_34); - void* var_36 = tensorConvolution(var_35, conv2d_9_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorAdd(var_36, conv2d_9_b); - void* var_38 = tensorBatchNorm(var_37, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_39 = tensorRelu(var_38); - void* var_40 = tensorConvolution(var_39, conv2d_10_w, 1, 1, 1, 1, 1, 1); - void* var_41 = tensorAdd(var_40, conv2d_10_b); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_11_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorAdd(var_44, conv2d_11_b); - void* var_46 = tensorBatchNorm(var_45, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_47 = tensorAdd(var_46, var_35); - void* var_48 = tensorRelu(var_47); - void* var_49 = tensorConvolution(var_48, conv2d_12_w, 0, 0, 2, 2, 1, 1); - void* var_50 = tensorAdd(var_49, conv2d_12_b); - void* var_51 = tensorBatchNorm(var_50, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_52 = tensorRelu(var_51); - void* var_53 = tensorConvolution(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 1); - void* var_54 = tensorAdd(var_53, conv2d_13_b); - void* var_55 = tensorBatchNorm(var_54, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_56 = tensorRelu(var_55); - void* var_57 = tensorConvolution(var_56, conv2d_14_w, 0, 0, 1, 1, 1, 1); - void* var_58 = tensorAdd(var_57, conv2d_14_b); - void* var_59 = tensorBatchNorm(var_58, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_60 = tensorConvolution(var_48, conv2d_15_w, 0, 0, 2, 2, 1, 1); - void* var_61 = tensorAdd(var_60, conv2d_15_b); - void* var_62 = tensorBatchNorm(var_61, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_63 = tensorAdd(var_59, var_62); - void* var_64 = tensorRelu(var_63); - void* var_65 = tensorConvolution(var_64, conv2d_16_w, 0, 0, 1, 1, 1, 1); - void* var_66 = tensorAdd(var_65, conv2d_16_b); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_68 = tensorRelu(var_67); - void* var_69 = tensorConvolution(var_68, conv2d_17_w, 1, 1, 1, 1, 1, 1); - void* var_70 = tensorAdd(var_69, conv2d_17_b); - void* var_71 = tensorBatchNorm(var_70, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_72 = tensorRelu(var_71); - void* var_73 = tensorConvolution(var_72, conv2d_18_w, 0, 0, 1, 1, 1, 1); - void* var_74 = tensorAdd(var_73, conv2d_18_b); - void* var_75 = tensorBatchNorm(var_74, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_76 = tensorAdd(var_75, var_64); - void* var_77 = tensorRelu(var_76); - void* var_78 = tensorConvolution(var_77, conv2d_19_w, 0, 0, 1, 1, 1, 1); - void* var_79 = tensorAdd(var_78, conv2d_19_b); - void* var_80 = tensorBatchNorm(var_79, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_81 = tensorRelu(var_80); - void* var_82 = tensorConvolution(var_81, conv2d_20_w, 1, 1, 1, 1, 1, 1); - void* var_83 = tensorAdd(var_82, conv2d_20_b); - void* var_84 = tensorBatchNorm(var_83, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_85 = tensorRelu(var_84); - void* var_86 = tensorConvolution(var_85, conv2d_21_w, 0, 0, 1, 1, 1, 1); - void* var_87 = tensorAdd(var_86, conv2d_21_b); - void* var_88 = tensorBatchNorm(var_87, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_89 = tensorAdd(var_88, var_77); - void* var_90 = tensorRelu(var_89); - void* var_91 = tensorConvolution(var_90, conv2d_22_w, 0, 0, 1, 1, 1, 1); - void* var_92 = tensorAdd(var_91, conv2d_22_b); - void* var_93 = tensorBatchNorm(var_92, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_94 = tensorRelu(var_93); - void* var_95 = tensorConvolution(var_94, conv2d_23_w, 1, 1, 1, 1, 1, 1); - void* var_96 = tensorAdd(var_95, conv2d_23_b); - void* var_97 = tensorBatchNorm(var_96, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_98 = tensorRelu(var_97); - void* var_99 = tensorConvolution(var_98, conv2d_24_w, 0, 0, 1, 1, 1, 1); - void* var_100 = tensorAdd(var_99, conv2d_24_b); - void* var_101 = tensorBatchNorm(var_100, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_102 = tensorAdd(var_101, var_90); - void* var_103 = tensorRelu(var_102); - void* var_104 = tensorConvolution(var_103, conv2d_25_w, 0, 0, 2, 2, 1, 1); - void* var_105 = tensorAdd(var_104, conv2d_25_b); - void* var_106 = tensorBatchNorm(var_105, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_107 = tensorRelu(var_106); - void* var_108 = tensorConvolution(var_107, conv2d_26_w, 1, 1, 1, 1, 1, 1); - void* var_109 = tensorAdd(var_108, conv2d_26_b); - void* var_110 = tensorBatchNorm(var_109, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_111 = tensorRelu(var_110); - void* var_112 = tensorConvolution(var_111, conv2d_27_w, 0, 0, 1, 1, 1, 1); - void* var_113 = tensorAdd(var_112, conv2d_27_b); - void* var_114 = tensorBatchNorm(var_113, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_115 = tensorConvolution(var_103, conv2d_28_w, 0, 0, 2, 2, 1, 1); - void* var_116 = tensorAdd(var_115, conv2d_28_b); - void* var_117 = tensorBatchNorm(var_116, batch_normalization_28_gamma, batch_normalization_28_beta, batch_normalization_28_mean, batch_normalization_28_variance, 0.001); - void* var_118 = tensorAdd(var_114, var_117); - void* var_119 = tensorRelu(var_118); - void* var_120 = tensorConvolution(var_119, conv2d_29_w, 0, 0, 1, 1, 1, 1); - void* var_121 = tensorAdd(var_120, conv2d_29_b); - void* var_122 = tensorBatchNorm(var_121, batch_normalization_29_gamma, batch_normalization_29_beta, batch_normalization_29_mean, batch_normalization_29_variance, 0.001); - void* var_123 = tensorRelu(var_122); - void* var_124 = tensorConvolution(var_123, conv2d_30_w, 1, 1, 1, 1, 1, 1); - void* var_125 = tensorAdd(var_124, conv2d_30_b); - void* var_126 = tensorBatchNorm(var_125, batch_normalization_30_gamma, batch_normalization_30_beta, batch_normalization_30_mean, batch_normalization_30_variance, 0.001); - void* var_127 = tensorRelu(var_126); - void* var_128 = tensorConvolution(var_127, conv2d_31_w, 0, 0, 1, 1, 1, 1); - void* var_129 = tensorAdd(var_128, conv2d_31_b); - void* var_130 = tensorBatchNorm(var_129, batch_normalization_31_gamma, batch_normalization_31_beta, batch_normalization_31_mean, batch_normalization_31_variance, 0.001); - void* var_131 = tensorAdd(var_130, var_119); - void* var_132 = tensorRelu(var_131); - void* var_133 = tensorConvolution(var_132, conv2d_32_w, 0, 0, 1, 1, 1, 1); - void* var_134 = tensorAdd(var_133, conv2d_32_b); - void* var_135 = tensorBatchNorm(var_134, batch_normalization_32_gamma, batch_normalization_32_beta, batch_normalization_32_mean, batch_normalization_32_variance, 0.001); - void* var_136 = tensorRelu(var_135); - void* var_137 = tensorConvolution(var_136, conv2d_33_w, 1, 1, 1, 1, 1, 1); - void* var_138 = tensorAdd(var_137, conv2d_33_b); - void* var_139 = tensorBatchNorm(var_138, batch_normalization_33_gamma, batch_normalization_33_beta, batch_normalization_33_mean, batch_normalization_33_variance, 0.001); - void* var_140 = tensorRelu(var_139); - void* var_141 = tensorConvolution(var_140, conv2d_34_w, 0, 0, 1, 1, 1, 1); - void* var_142 = tensorAdd(var_141, conv2d_34_b); - void* var_143 = tensorBatchNorm(var_142, batch_normalization_34_gamma, batch_normalization_34_beta, batch_normalization_34_mean, batch_normalization_34_variance, 0.001); - void* var_144 = tensorAdd(var_143, var_132); - void* var_145 = tensorRelu(var_144); - void* var_146 = tensorConvolution(var_145, conv2d_35_w, 0, 0, 1, 1, 1, 1); - void* var_147 = tensorAdd(var_146, conv2d_35_b); - void* var_148 = tensorBatchNorm(var_147, batch_normalization_35_gamma, batch_normalization_35_beta, batch_normalization_35_mean, batch_normalization_35_variance, 0.001); - void* var_149 = tensorRelu(var_148); - void* var_150 = tensorConvolution(var_149, conv2d_36_w, 1, 1, 1, 1, 1, 1); - void* var_151 = tensorAdd(var_150, conv2d_36_b); - void* var_152 = tensorBatchNorm(var_151, batch_normalization_36_gamma, batch_normalization_36_beta, batch_normalization_36_mean, batch_normalization_36_variance, 0.001); - void* var_153 = tensorRelu(var_152); - void* var_154 = tensorConvolution(var_153, conv2d_37_w, 0, 0, 1, 1, 1, 1); - void* var_155 = tensorAdd(var_154, conv2d_37_b); - void* var_156 = tensorBatchNorm(var_155, batch_normalization_37_gamma, batch_normalization_37_beta, batch_normalization_37_mean, batch_normalization_37_variance, 0.001); - void* var_157 = tensorAdd(var_156, var_145); - void* var_158 = tensorRelu(var_157); - void* var_159 = tensorConvolution(var_158, conv2d_38_w, 0, 0, 1, 1, 1, 1); - void* var_160 = tensorAdd(var_159, conv2d_38_b); - void* var_161 = tensorBatchNorm(var_160, batch_normalization_38_gamma, batch_normalization_38_beta, batch_normalization_38_mean, batch_normalization_38_variance, 0.001); - void* var_162 = tensorRelu(var_161); - void* var_163 = tensorConvolution(var_162, conv2d_39_w, 1, 1, 1, 1, 1, 1); - void* var_164 = tensorAdd(var_163, conv2d_39_b); - void* var_165 = tensorBatchNorm(var_164, batch_normalization_39_gamma, batch_normalization_39_beta, batch_normalization_39_mean, batch_normalization_39_variance, 0.001); - void* var_166 = tensorRelu(var_165); - void* var_167 = tensorConvolution(var_166, conv2d_40_w, 0, 0, 1, 1, 1, 1); - void* var_168 = tensorAdd(var_167, conv2d_40_b); - void* var_169 = tensorBatchNorm(var_168, batch_normalization_40_gamma, batch_normalization_40_beta, batch_normalization_40_mean, batch_normalization_40_variance, 0.001); - void* var_170 = tensorAdd(var_169, var_158); - void* var_171 = tensorRelu(var_170); - void* var_172 = tensorConvolution(var_171, conv2d_41_w, 0, 0, 1, 1, 1, 1); - void* var_173 = tensorAdd(var_172, conv2d_41_b); - void* var_174 = tensorBatchNorm(var_173, batch_normalization_41_gamma, batch_normalization_41_beta, batch_normalization_41_mean, batch_normalization_41_variance, 0.001); - void* var_175 = tensorRelu(var_174); - void* var_176 = tensorConvolution(var_175, conv2d_42_w, 1, 1, 1, 1, 1, 1); - void* var_177 = tensorAdd(var_176, conv2d_42_b); - void* var_178 = tensorBatchNorm(var_177, batch_normalization_42_gamma, batch_normalization_42_beta, batch_normalization_42_mean, batch_normalization_42_variance, 0.001); - void* var_179 = tensorRelu(var_178); - void* var_180 = tensorConvolution(var_179, conv2d_43_w, 0, 0, 1, 1, 1, 1); - void* var_181 = tensorAdd(var_180, conv2d_43_b); - void* var_182 = tensorBatchNorm(var_181, batch_normalization_43_gamma, batch_normalization_43_beta, batch_normalization_43_mean, batch_normalization_43_variance, 0.001); - void* var_183 = tensorAdd(var_182, var_171); - void* var_184 = tensorRelu(var_183); - void* var_185 = tensorConvolution(var_184, conv2d_44_w, 0, 0, 2, 2, 1, 1); - void* var_186 = tensorAdd(var_185, conv2d_44_b); - void* var_187 = tensorBatchNorm(var_186, batch_normalization_44_gamma, batch_normalization_44_beta, batch_normalization_44_mean, batch_normalization_44_variance, 0.001); - void* var_188 = tensorRelu(var_187); - void* var_189 = tensorConvolution(var_188, conv2d_45_w, 1, 1, 1, 1, 1, 1); - void* var_190 = tensorAdd(var_189, conv2d_45_b); - void* var_191 = tensorBatchNorm(var_190, batch_normalization_45_gamma, batch_normalization_45_beta, batch_normalization_45_mean, batch_normalization_45_variance, 0.001); - void* var_192 = tensorRelu(var_191); - void* var_193 = tensorConvolution(var_192, conv2d_46_w, 0, 0, 1, 1, 1, 1); - void* var_194 = tensorAdd(var_193, conv2d_46_b); - void* var_195 = tensorBatchNorm(var_194, batch_normalization_46_gamma, batch_normalization_46_beta, batch_normalization_46_mean, batch_normalization_46_variance, 0.001); - void* var_196 = tensorConvolution(var_184, conv2d_47_w, 0, 0, 2, 2, 1, 1); - void* var_197 = tensorAdd(var_196, conv2d_47_b); - void* var_198 = tensorBatchNorm(var_197, batch_normalization_47_gamma, batch_normalization_47_beta, batch_normalization_47_mean, batch_normalization_47_variance, 0.001); - void* var_199 = tensorAdd(var_195, var_198); - void* var_200 = tensorRelu(var_199); - void* var_201 = tensorConvolution(var_200, conv2d_48_w, 0, 0, 1, 1, 1, 1); - void* var_202 = tensorAdd(var_201, conv2d_48_b); - void* var_203 = tensorBatchNorm(var_202, batch_normalization_48_gamma, batch_normalization_48_beta, batch_normalization_48_mean, batch_normalization_48_variance, 0.001); - void* var_204 = tensorRelu(var_203); - void* var_205 = tensorConvolution(var_204, conv2d_49_w, 1, 1, 1, 1, 1, 1); - void* var_206 = tensorAdd(var_205, conv2d_49_b); - void* var_207 = tensorBatchNorm(var_206, batch_normalization_49_gamma, batch_normalization_49_beta, batch_normalization_49_mean, batch_normalization_49_variance, 0.001); - void* var_208 = tensorRelu(var_207); - void* var_209 = tensorConvolution(var_208, conv2d_50_w, 0, 0, 1, 1, 1, 1); - void* var_210 = tensorAdd(var_209, conv2d_50_b); - void* var_211 = tensorBatchNorm(var_210, batch_normalization_50_gamma, batch_normalization_50_beta, batch_normalization_50_mean, batch_normalization_50_variance, 0.001); - void* var_212 = tensorAdd(var_211, var_200); - void* var_213 = tensorRelu(var_212); - void* var_214 = tensorConvolution(var_213, conv2d_51_w, 0, 0, 1, 1, 1, 1); - void* var_215 = tensorAdd(var_214, conv2d_51_b); - void* var_216 = tensorBatchNorm(var_215, batch_normalization_51_gamma, batch_normalization_51_beta, batch_normalization_51_mean, batch_normalization_51_variance, 0.001); - void* var_217 = tensorRelu(var_216); - void* var_218 = tensorConvolution(var_217, conv2d_52_w, 1, 1, 1, 1, 1, 1); - void* var_219 = tensorAdd(var_218, conv2d_52_b); - void* var_220 = tensorBatchNorm(var_219, batch_normalization_52_gamma, batch_normalization_52_beta, batch_normalization_52_mean, batch_normalization_52_variance, 0.001); - void* var_221 = tensorRelu(var_220); - void* var_222 = tensorConvolution(var_221, conv2d_53_w, 0, 0, 1, 1, 1, 1); - void* var_223 = tensorAdd(var_222, conv2d_53_b); - void* var_224 = tensorBatchNorm(var_223, batch_normalization_53_gamma, batch_normalization_53_beta, batch_normalization_53_mean, batch_normalization_53_variance, 0.001); - void* var_225 = tensorAdd(var_224, var_213); - void* var_226 = tensorRelu(var_225); - void* var_227 = tensorPooling(var_226,1,7,7,0,0,7,7); - void* var_229 = tensorGemmGPU(var_227, dense_1_w); - void* var_230 = tensorAdd(var_229, dense_1_b); - void* var_231 = tensorSoftmax(var_230); - - uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy3(labels, var_231); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline.cc deleted file mode 100644 index b694f007b2e1c4cbe71bbe53c1065888542b23f1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline.cc +++ /dev/null @@ -1,161 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* Pipeline: Gaussian - Outline - Motion Blur - Emboss ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - //long int test_batch_size = 9145; - long int test_batch_size = 2000; - long int H = 240; - long int W = 300; - - printf("Reading input\n"); - void* input = readTrainedWeights("../model_params/pipeline/dataset/caltech101_255_float32.bin", - float_type, - test_batch_size, 1, H, W); - printf("Reading golden output\n"); - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/caltech-G-O-M-E-FP32-clipped-2000.bin", - //void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/caltech-gaussian.bin", - float_type, - test_batch_size, 1, H, W); - - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - - void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255); - - void* outline_out = tensorConvolution(gaussian_out_clip, outline_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - void * outline_out_clip = tensorRelu2(outline_out, 0, 255); - - void* motionblur_out = tensorConvolution(outline_out_clip, motionblur_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * motionblur_out_clip = tensorRelu2(motionblur_out, 0, 255); - - void* emboss_out = tensorConvolution(motionblur_out_clip, emboss_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - void* emboss_bias_out = tensorAdd(emboss_out, emboss_bias); - void* result = tensorRelu2(emboss_bias_out, 0, 255); - //void* result = gaussian_out; - - // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation - hpvm_request_tensor(result, 0); - - dumpOutput(result); - - //void* psnr_output = readTrainedWeights("../model_params/pipeline/golden_output/caltech-gaussian.bin", - //float_type, - //test_batch_size, 1, H, W); - computePSNRViolation(result, golden_output, 30); - - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEMO.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEMO.cc deleted file mode 100644 index 199a29a73af1a98fac31ae55f93c8bc8e7e2d6d4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEMO.cc +++ /dev/null @@ -1,153 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* Pipeline: Gaussian - Emboss - Motion Blur - Outline ********** \n"); - - //long int test_batch_size = 9145; - long int test_batch_size = 1000; - long int H = 240; - long int W = 300; - - printf("Reading input\n"); - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - printf("Reading golden output\n"); - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEMO_calib.bin", - float_type, - test_batch_size, 1, H, W); - - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - - - //printTensorValues(input); - //printTensorValues(motionblur_filter); - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - - void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255); - - void* emboss_out = tensorConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - void* emboss_bias_out = tensorAdd(emboss_out, emboss_bias); - void* emboss_bias_out_clip = tensorRelu2(emboss_bias_out, 0, 255); - - void* motionblur_out = tensorConvolution(emboss_bias_out_clip, motionblur_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * motionblur_out_clip = tensorRelu2(motionblur_out, 0, 255); - - void* outline_out = tensorConvolution(motionblur_out_clip, outline_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - void* result = tensorRelu2(outline_out, 0, 255); - - - // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation - hpvm_request_tensor(result, 0); - - //dumpOutput(result, "GEMO_calib.bin"); - - computePSNRViolation(result, golden_output, 30); - - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEO.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEO.cc deleted file mode 100644 index c2d6d1fb4dd3f8e0fe25db4f2628700a60aa44da..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEO.cc +++ /dev/null @@ -1,147 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* Pipeline: Gaussian - Emboss - Outline ********** \n"); - - //long int test_batch_size = 9145; - long int test_batch_size = 1000; - long int H = 240; - long int W = 300; - - printf("Reading input\n"); - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - printf("Reading golden output\n"); - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEO_calib.bin", - float_type, - test_batch_size, 1, H, W); - - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - - void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255); - - void* emboss_out = tensorConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - void* emboss_bias_out = tensorAdd(emboss_out, emboss_bias); - void* emboss_bias_out_clip = tensorRelu2(emboss_bias_out, 0, 255); - - void* outline_out = tensorConvolution(emboss_bias_out_clip, outline_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - void * result = tensorRelu2(outline_out, 0, 255); - - //void* result = gaussian_out; - - // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation - hpvm_request_tensor(result, 0); - - //dumpOutput(result, "GEO_calib.bin"); - - computePSNRViolation(result, golden_output, 30); - - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEOM.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEOM.cc deleted file mode 100644 index 9c219f4daac2af708f02a64d97bf84ae36047316..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GEOM.cc +++ /dev/null @@ -1,151 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* Pipeline: Gaussian - Emboss - Outline - Motion Blur ********** \n"); - //long int test_batch_size = 9145; - long int test_batch_size = 1000; - long int H = 240; - long int W = 300; - - printf("Reading input\n"); - - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - printf("Reading golden output\n"); - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEOM_calib.bin", - float_type, - test_batch_size, 1, H, W); - - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - - void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255); - - void* emboss_out = tensorConvolution(gaussian_out_clip, emboss_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - void* emboss_bias_out = tensorAdd(emboss_out, emboss_bias); - void* emboss_bias_out_clip = tensorRelu2(emboss_bias_out, 0, 255); - - void* outline_out = tensorConvolution(emboss_bias_out_clip, outline_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - void * outline_out_clip = tensorRelu2(outline_out, 0, 255); - - void* motionblur_out = tensorConvolution(outline_out_clip, motionblur_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * result = tensorRelu2(motionblur_out, 0, 255); - - - // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation - hpvm_request_tensor(result, 0); - - //dumpOutput(result, "GEOM_calib.bin"); - - computePSNRViolation(result, golden_output, 30); - - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSM.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSM.cc deleted file mode 100644 index 721eb887ca0a6cd9e6cb5c992f7c0559716a1259..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSM.cc +++ /dev/null @@ -1,146 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* Pipeline: Gaussian - Sharpen - Motion Blur ********** \n"); - - //long int test_batch_size = 9145; - long int test_batch_size = 1000; - long int H = 240; - long int W = 300; - - printf("Reading input\n"); - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - printf("Reading golden output\n"); - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSM_calib.bin", - float_type, - test_batch_size, 1, H, W); - - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255); - - void* sharpen_out = tensorConvolution(gaussian_out_clip, sharpen_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - void * sharpen_out_clip = tensorRelu2(sharpen_out, 0, 255); - - void* motionblur_out = tensorConvolution(sharpen_out_clip, motionblur_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * result = tensorRelu2(motionblur_out, 0, 255); - - // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation - hpvm_request_tensor(result, 0); - - //dumpOutput(result, "GSM_calib.bin"); - - computePSNRViolation(result, golden_output, 30); - - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSME.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSME.cc deleted file mode 100644 index 19f0210e83939568ae2ca9b198ccfa95be70113e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/image_benchs_oopsla19/pipeline_GSME.cc +++ /dev/null @@ -1,153 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* Pipeline: Gaussian - Sharpen - Motion Blur - Emboss ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - //long int test_batch_size = 9145; - long int test_batch_size = 1000; - long int H = 240; - long int W = 300; - - printf("Reading input\n"); - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - printf("Reading golden output\n"); - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSME_calib.bin", - float_type, - test_batch_size, 1, H, W); - - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* gaussian_out = tensorConvolution(input, gaussian_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * gaussian_out_clip = tensorRelu2(gaussian_out, 0, 255); - - void* sharpen_out = tensorConvolution(gaussian_out_clip, sharpen_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - void * sharpen_out_clip = tensorRelu2(sharpen_out, 0, 255); - - void* motionblur_out = tensorConvolution(sharpen_out_clip, motionblur_filter, 4, 4, 1, 1, - conv_mode, conv_precision); - void * motionblur_out_clip = tensorRelu2(motionblur_out, 0, 255); - - void* emboss_out = tensorConvolution(motionblur_out_clip, emboss_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - void* emboss_bias_out = tensorAdd(emboss_out, emboss_bias); - void* result = tensorRelu2(emboss_bias_out, 0, 255); - - - //void* result = gaussian_out; - - // NOTE-IMP: Important to include this call always before doing dumpOutput and computePSNViolation - hpvm_request_tensor(result, 0); - - //dumpOutput(result, "GSME_calib.bin"); - - computePSNRViolation(result, golden_output, 30); - - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/alexnet_cifar10_layers.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/alexnet_cifar10_layers.cc deleted file mode 100644 index ac0d727f39df27763fb964d3846a39a4436ba2ef..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/alexnet_cifar10_layers.cc +++ /dev/null @@ -1,156 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - int total_runs = 2; - if(Opentuner_run){ - total_runs = 100000; - } - - printf("********* Lenet-2 Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 5000; - - uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size); - - for(int i = 0; i < total_runs; i++){ - - void* input = readTrainedWeights("../model_params/alexnet_cifar10/norm_cifar_input.bin", - float_type, - test_batch_size, 3, 32, 32); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv1.bin", - float_type, 64, 3, 11, 11); - void* conv1_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv1_bias.bin", - float_type, 1, 64, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv2.bin", - float_type, 192, 64, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv2_bias.bin", - float_type, 1, 192, 1, 1); - - void* conv3_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv3.bin", - float_type, 384, 192, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv3_bias.bin", - float_type, 1, 384, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv4.bin", - float_type, 256, 384, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv4_bias.bin", - float_type, 1, 256, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv5.bin", - float_type, 256, 256, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv5_bias.bin", - float_type, 1, 256, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet_cifar10/fc1.bin", - float_type, 1, 1, 4096, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performance profiling - startProfiling(); - - - void* conv1_out = ConvLayer_GPU(input, conv1_filter, conv1_bias, - 5, 5, 1, 1, 0, 2, 0, -1,1); - - void* conv2_out = ConvLayer_GPU(conv1_out, conv2_filter, conv2_bias, - 2, 2, 1, 1, 0, 2, 0, -1,1); - - void* conv3_out = ConvLayer_GPU(conv2_out, conv3_filter, conv3_bias, - 1, 1, 1, 1, 0, 0, 0, -1,1); - - void* conv4_out = ConvLayer_GPU(conv3_out, conv4_filter, conv4_bias, - 1, 1, 1, 1, 0, 0, 0, -1,1); - - void* conv5_out = ConvLayer_GPU(conv4_out, conv5_filter, conv5_bias, - 1, 1, 1, 1, 0, 2, 0, -1,1); - - void* fc1_out = FCLayer_GPU(conv5_out, fc1_weights, fc1_bias, -1, -1,1); - - void* result = tensorSoftmax(fc1_out); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(1); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers.cc deleted file mode 100644 index 77b75add2bf858d56dcb2d427958bf0ea5ff20a0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers.cc +++ /dev/null @@ -1,146 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - int total_runs = 10; - if(Opentuner_run){ - total_runs = 100000; - } - - printf("********* Lenet-2 Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - for(int i = 0; i < total_runs; i++){ - - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - - clearTensorMap(); - - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performance profiling - startProfiling(); - - void* conv1_out = ConvLayer_GPU(input, conv1_filter, conv1_bias, - 2, 2, 1, 1, 0, 2, 0, -1,1); - - void* conv2_out = ConvLayer_GPU(conv1_out, conv2_filter, - conv2_bias, - 2, 2, 1, 1, 0, 2, 0, -1,1); - - void* fc1_out = FCLayer_GPU(conv2_out, fc1_weights, fc1_bias, - 0, -1,1); - - void* fc2_out = FCLayer_GPU(fc1_out, fc2_weights, fc2_bias, - 0, -1,1); - - void* result = tensorSoftmax(fc2_out); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers2.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers2.cc deleted file mode 100644 index c1345ff24083a0ce20f3274afc74916968be4c06..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/lenet_layers2.cc +++ /dev/null @@ -1,141 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 100000; - } - - printf("********* Lenet-2 Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - for(int i = 0; i < total_runs; i++){ - - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet_keras2/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_keras2/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_keras2/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_keras2/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_keras2/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet_keras2/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet_keras2/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet_keras2/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - if(Opentuner_run){ - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - // Start power and performance profiling - startProfiling(); - - void* conv1_out = ConvLayer_GPU(input, conv1_filter, conv1_bias, - 2, 2, 1, 1, 0, 2, 0, -1,1); - - void* conv2_out = ConvLayer_GPU(conv1_out, conv2_filter, - conv2_bias, - 2, 2, 1, 1, 0, 2, 0, -1,1); - - void* fc1_out = FCLayer_GPU(conv2_out, fc1_weights, fc1_bias, - 0, -1,1); - - void* fc2_out = FCLayer_GPU(fc1_out, fc2_weights, fc2_bias, - 0, -1,1); - - void* result = tensorSoftmax(fc2_out); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers.cc deleted file mode 100644 index df663a81759f9e096e067859f8aa487882d8835f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers.cc +++ /dev/null @@ -1,148 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 100000; - } - - printf("********* Test Layer source ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - for(int i = 0; i < total_runs; i++){ - - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - //void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin", - // float_type, 32, 1, 5, 5); - //void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin", - // float_type, 1, 32, 1, 1); - //void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin", - // float_type, 64, 32, 5, 5); - //void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin", - // float_type, 1, 64, 1, 1); - //void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin", - // float_type, 1, 1, 7*7*64, 1024); - //void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin", - // float_type, 1, 1024, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/test_keras/fc1.bin", - float_type, 1, 1, 784, 500); - void* fc1_bias = readTrainedWeights("../model_params/test_keras/fc1_bias.bin", - float_type, 1, 500, 1, 1); - - void* fc2_weights = readTrainedWeights("../model_params/test_keras/fc2.bin", - float_type, 1, 1, 500, 10); - void* fc2_bias = readTrainedWeights("../model_params/test_keras/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performance profiling - startProfiling(); - - //- void* conv1_out = ConvLayer_GPU(input, conv1_filter, conv1_bias, - // 2, 2, 1, 1, 0, 2, 0, -1,1); - - //void* conv2_out = ConvLayer_GPU(conv1_out, conv2_filter, - // conv2_bias, - // 2, 2, 1, 1, 0, 2, 0, -1,1); - - //void* fc1_out = FCLayer_GPU(conv2_out, fc1_weights, fc1_bias, - // 0, -1,1); - - void* fc1_out = FCLayer_GPU(input, fc1_weights, fc1_bias, 0, -1,1); - - void* fc2_out = FCLayer_GPU(fc1_out, fc2_weights, fc2_bias, 0, -1,1); - - void* result = tensorSoftmax(fc2_out); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers2.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers2.cc deleted file mode 100644 index 168025d42579e7b2bced6d7c34866e7c275cd739..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/layers/test_layers2.cc +++ /dev/null @@ -1,155 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 100000; - } - - printf("********* Lenet-2 Architecture ********** \n"); - - int test_batch_size = 10000; - - uint8_t* labels = readLabels("../model_params/test_keras/test_labels.bin", test_batch_size); - - for(int i = 0; i < total_runs; i++){ - - void* input = readTrainedWeights("../model_params/cifar_keras/input.bin", - float_type, - test_batch_size, 3, 32, 32); - - void* conv1_filter = readTrainedWeights("../model_params/cifar10/conv1.bin", - float_type, 32, 3, 3, 3); - void* conv1_bias = readTrainedWeights("../model_params/cifar10/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/cifar10/conv2.bin", - float_type, 64, 32, 3, 3); - void* conv2_bias = readTrainedWeights("../model_params/cifar10/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* conv3_filter = readTrainedWeights("../model_params/cifar10/conv3.bin", - float_type, 128, 64, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/cifar10/conv3_bias.bin", - float_type, 1, 128, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/cifar10/conv4.bin", - float_type, 128, 128, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/cifar10/conv4_bias.bin", - float_type, 1, 128, 1, 1); - - - void* fc1_weights = readTrainedWeights("../model_params/test_keras/cifar_fc1.bin", - float_type, 1, 1, 3*32*32, 10); - void* fc1_bias = readTrainedWeights("../model_params/test_keras/cifar_fc1_bias.bin", - float_type, 1, 10, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/test_keras/cifar_fc2.bin", - float_type, 1, 1, 500, 10); - void* fc2_bias = readTrainedWeights("../model_params/test_keras/cifar_fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - // Start power and performance profiling - startProfiling(); - - /* - void* conv1_out = ConvLayer_GPU(input, conv1_filter, conv1_bias, - 1, 1, 1, 1, 0, 0, 0, -1,1); - - void* conv2_out = ConvLayer_GPU(conv1_out, conv2_filter, conv2_bias, - 1, 1, 1, 1, 0, 2, 0, -1,1); - - void* conv3_out = ConvLayer_GPU(conv2_out, conv3_filter, conv3_bias, - 1, 1, 1, 1, 0, 2, 0, -1,1); - - void* conv4_out = ConvLayer_GPU(conv3_out, conv4_filter, conv4_bias, - 1, 1, 1, 1, 0, 2, 0, -1,1); - */ - - void* fc1_out = FCLayer_GPU(input, fc1_weights, NULL, -1, -1,1); - //-- void* fc1_out = tensorGemmGPU(input, fc1_weights); - - - //void* fc2_out = FCLayer_GPU(fc1_out, fc2_weights, fc2_bias, 0, -1,1); - - void* result = tensorSoftmax(fc1_out); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/lenet_perf.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/lenet_perf.cc deleted file mode 100644 index 7c9583f291ea908c4c89a8b56045e06585a4f83a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/lenet_perf.cc +++ /dev/null @@ -1,185 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -bool Opentuner_run = false; - -int total_runs = 1; - - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* Lenet-2 Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 1000; - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performnce profiling - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - //void* conv1out = tensorConvPerfCuda(input, conv1_filter, 2, 2, 1, 1, - // conv_mode, conv_precision, 2, 2, 1); - - void* conv1out = tensorConvSampSim(input, conv1_filter, 2, 2, 1, 1, - conv_mode, conv_precision, 4, 0); - - // NOTE: For tensorAdd, the only dimension that MUST match is channels - tensorAdd(conv1out, conv1_bias); // NOTE: In place operation - - void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); - - void* conv1_tanh = tensorTanh(pool1out); - - // NOTE: input channels have to match between tensor op inputs and outputs - //void* conv2out = tensorConvPerfCuda(conv1_tanh, conv2_filter, 2, 2, 1, 1, - // conv_mode, conv_precision, 1, 2, 1); - - void* conv2out = tensorConvSampSim(conv1_tanh, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision, 2, 0); - - tensorAdd(conv2out, conv2_bias); // NOTE: In place operation - - void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); - - void* conv2_tanh = tensorTanh(pool2out); - - void* gemm1out = tensorGemmGPU(conv2_tanh, fc1_weights); - - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - - void* tanh1out = tensorTanh(gemm1biasout); - - void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights); - - void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); - - void* tanh2out = tensorTanh(gemm2_biasout); - - void* result = tensorSoftmax(tanh2out); - - // End profiling and dump output to profile.txt - stopProfiling(); - - float accuracy = computeAccuracy2(labels, test_batch_size, result); - dumpFinalAccuracy(accuracy); - - - //FIXME: remove the comment below to use piped autotuner - //dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - dumpExecutionAccuracies(); - - -} - - - -int main(int argc, char* argv[]){ - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - llvm_hpvm_initTensorRt(0); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/mobilenetv2_cifar10.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/mobilenetv2_cifar10.cc deleted file mode 100644 index fcbb17f411adaf5e46162a0524efc97c90174506..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/mobilenetv2_cifar10.cc +++ /dev/null @@ -1,721 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/mobilenetv2_quant/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,32,1,1); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,16,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,16,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,16,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,16,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,96,16,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,96,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,96,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,96,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,24,96,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,24,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,24,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,24,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,24,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,144,24,1,1); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,144,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,144,1,3,3); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,144,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,24,144,1,1); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,24,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,24,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,24,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,24,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,144,24,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,144,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,144,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,144,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,144,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,144,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,192,32,1,1); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,192,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,192,1,3,3); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,192,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,192,1,1); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,192,32,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,192,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,192,1,3,3); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,192,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,192,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,192,32,1,1); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,192,1,1); - std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); - void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,192,1,3,3); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,192,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,192,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,64,192,1,1); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,384,64,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,384,1,1); - std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); - void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,384,1,3,3); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,384,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,384,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,384,64,1,1); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,384,1,1); - std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); - void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,384,1,3,3); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,384,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,384,1,1); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,384,64,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,384,1,1); - std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); - void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,384,1,3,3); - std::string batch_normalization_28_gamma_path = dir_prefix + std::string("batch_normalization_28_gamma.bin"); - void* batch_normalization_28_gamma = readTrainedWeights(batch_normalization_28_gamma_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_28_beta_path = dir_prefix + std::string("batch_normalization_28_beta.bin"); - void* batch_normalization_28_beta = readTrainedWeights(batch_normalization_28_beta_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_28_mean_path = dir_prefix + std::string("batch_normalization_28_mean.bin"); - void* batch_normalization_28_mean = readTrainedWeights(batch_normalization_28_mean_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_28_variance_path = dir_prefix + std::string("batch_normalization_28_variance.bin"); - void* batch_normalization_28_variance = readTrainedWeights(batch_normalization_28_variance_path.c_str(), 0,1,384,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,384,1,1); - std::string batch_normalization_29_gamma_path = dir_prefix + std::string("batch_normalization_29_gamma.bin"); - void* batch_normalization_29_gamma = readTrainedWeights(batch_normalization_29_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_29_beta_path = dir_prefix + std::string("batch_normalization_29_beta.bin"); - void* batch_normalization_29_beta = readTrainedWeights(batch_normalization_29_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_29_mean_path = dir_prefix + std::string("batch_normalization_29_mean.bin"); - void* batch_normalization_29_mean = readTrainedWeights(batch_normalization_29_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_29_variance_path = dir_prefix + std::string("batch_normalization_29_variance.bin"); - void* batch_normalization_29_variance = readTrainedWeights(batch_normalization_29_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,384,64,1,1); - std::string batch_normalization_30_gamma_path = dir_prefix + std::string("batch_normalization_30_gamma.bin"); - void* batch_normalization_30_gamma = readTrainedWeights(batch_normalization_30_gamma_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_30_beta_path = dir_prefix + std::string("batch_normalization_30_beta.bin"); - void* batch_normalization_30_beta = readTrainedWeights(batch_normalization_30_beta_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_30_mean_path = dir_prefix + std::string("batch_normalization_30_mean.bin"); - void* batch_normalization_30_mean = readTrainedWeights(batch_normalization_30_mean_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_30_variance_path = dir_prefix + std::string("batch_normalization_30_variance.bin"); - void* batch_normalization_30_variance = readTrainedWeights(batch_normalization_30_variance_path.c_str(), 0,1,384,1,1); - std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); - void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,384,1,3,3); - std::string batch_normalization_31_gamma_path = dir_prefix + std::string("batch_normalization_31_gamma.bin"); - void* batch_normalization_31_gamma = readTrainedWeights(batch_normalization_31_gamma_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_31_beta_path = dir_prefix + std::string("batch_normalization_31_beta.bin"); - void* batch_normalization_31_beta = readTrainedWeights(batch_normalization_31_beta_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_31_mean_path = dir_prefix + std::string("batch_normalization_31_mean.bin"); - void* batch_normalization_31_mean = readTrainedWeights(batch_normalization_31_mean_path.c_str(), 0,1,384,1,1); - std::string batch_normalization_31_variance_path = dir_prefix + std::string("batch_normalization_31_variance.bin"); - void* batch_normalization_31_variance = readTrainedWeights(batch_normalization_31_variance_path.c_str(), 0,1,384,1,1); - std::string conv2d_22_w_path = dir_prefix + std::string("conv2d_22_w.bin"); - void* conv2d_22_w = readTrainedWeights(conv2d_22_w_path.c_str(), 0,96,384,1,1); - std::string batch_normalization_32_gamma_path = dir_prefix + std::string("batch_normalization_32_gamma.bin"); - void* batch_normalization_32_gamma = readTrainedWeights(batch_normalization_32_gamma_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_32_beta_path = dir_prefix + std::string("batch_normalization_32_beta.bin"); - void* batch_normalization_32_beta = readTrainedWeights(batch_normalization_32_beta_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_32_mean_path = dir_prefix + std::string("batch_normalization_32_mean.bin"); - void* batch_normalization_32_mean = readTrainedWeights(batch_normalization_32_mean_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_32_variance_path = dir_prefix + std::string("batch_normalization_32_variance.bin"); - void* batch_normalization_32_variance = readTrainedWeights(batch_normalization_32_variance_path.c_str(), 0,1,96,1,1); - std::string conv2d_23_w_path = dir_prefix + std::string("conv2d_23_w.bin"); - void* conv2d_23_w = readTrainedWeights(conv2d_23_w_path.c_str(), 0,576,96,1,1); - std::string batch_normalization_33_gamma_path = dir_prefix + std::string("batch_normalization_33_gamma.bin"); - void* batch_normalization_33_gamma = readTrainedWeights(batch_normalization_33_gamma_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_33_beta_path = dir_prefix + std::string("batch_normalization_33_beta.bin"); - void* batch_normalization_33_beta = readTrainedWeights(batch_normalization_33_beta_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_33_mean_path = dir_prefix + std::string("batch_normalization_33_mean.bin"); - void* batch_normalization_33_mean = readTrainedWeights(batch_normalization_33_mean_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_33_variance_path = dir_prefix + std::string("batch_normalization_33_variance.bin"); - void* batch_normalization_33_variance = readTrainedWeights(batch_normalization_33_variance_path.c_str(), 0,1,576,1,1); - std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); - void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,576,1,3,3); - std::string batch_normalization_34_gamma_path = dir_prefix + std::string("batch_normalization_34_gamma.bin"); - void* batch_normalization_34_gamma = readTrainedWeights(batch_normalization_34_gamma_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_34_beta_path = dir_prefix + std::string("batch_normalization_34_beta.bin"); - void* batch_normalization_34_beta = readTrainedWeights(batch_normalization_34_beta_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_34_mean_path = dir_prefix + std::string("batch_normalization_34_mean.bin"); - void* batch_normalization_34_mean = readTrainedWeights(batch_normalization_34_mean_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_34_variance_path = dir_prefix + std::string("batch_normalization_34_variance.bin"); - void* batch_normalization_34_variance = readTrainedWeights(batch_normalization_34_variance_path.c_str(), 0,1,576,1,1); - std::string conv2d_24_w_path = dir_prefix + std::string("conv2d_24_w.bin"); - void* conv2d_24_w = readTrainedWeights(conv2d_24_w_path.c_str(), 0,96,576,1,1); - std::string batch_normalization_35_gamma_path = dir_prefix + std::string("batch_normalization_35_gamma.bin"); - void* batch_normalization_35_gamma = readTrainedWeights(batch_normalization_35_gamma_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_35_beta_path = dir_prefix + std::string("batch_normalization_35_beta.bin"); - void* batch_normalization_35_beta = readTrainedWeights(batch_normalization_35_beta_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_35_mean_path = dir_prefix + std::string("batch_normalization_35_mean.bin"); - void* batch_normalization_35_mean = readTrainedWeights(batch_normalization_35_mean_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_35_variance_path = dir_prefix + std::string("batch_normalization_35_variance.bin"); - void* batch_normalization_35_variance = readTrainedWeights(batch_normalization_35_variance_path.c_str(), 0,1,96,1,1); - std::string conv2d_25_w_path = dir_prefix + std::string("conv2d_25_w.bin"); - void* conv2d_25_w = readTrainedWeights(conv2d_25_w_path.c_str(), 0,576,96,1,1); - std::string batch_normalization_36_gamma_path = dir_prefix + std::string("batch_normalization_36_gamma.bin"); - void* batch_normalization_36_gamma = readTrainedWeights(batch_normalization_36_gamma_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_36_beta_path = dir_prefix + std::string("batch_normalization_36_beta.bin"); - void* batch_normalization_36_beta = readTrainedWeights(batch_normalization_36_beta_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_36_mean_path = dir_prefix + std::string("batch_normalization_36_mean.bin"); - void* batch_normalization_36_mean = readTrainedWeights(batch_normalization_36_mean_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_36_variance_path = dir_prefix + std::string("batch_normalization_36_variance.bin"); - void* batch_normalization_36_variance = readTrainedWeights(batch_normalization_36_variance_path.c_str(), 0,1,576,1,1); - std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); - void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,576,1,3,3); - std::string batch_normalization_37_gamma_path = dir_prefix + std::string("batch_normalization_37_gamma.bin"); - void* batch_normalization_37_gamma = readTrainedWeights(batch_normalization_37_gamma_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_37_beta_path = dir_prefix + std::string("batch_normalization_37_beta.bin"); - void* batch_normalization_37_beta = readTrainedWeights(batch_normalization_37_beta_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_37_mean_path = dir_prefix + std::string("batch_normalization_37_mean.bin"); - void* batch_normalization_37_mean = readTrainedWeights(batch_normalization_37_mean_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_37_variance_path = dir_prefix + std::string("batch_normalization_37_variance.bin"); - void* batch_normalization_37_variance = readTrainedWeights(batch_normalization_37_variance_path.c_str(), 0,1,576,1,1); - std::string conv2d_26_w_path = dir_prefix + std::string("conv2d_26_w.bin"); - void* conv2d_26_w = readTrainedWeights(conv2d_26_w_path.c_str(), 0,96,576,1,1); - std::string batch_normalization_38_gamma_path = dir_prefix + std::string("batch_normalization_38_gamma.bin"); - void* batch_normalization_38_gamma = readTrainedWeights(batch_normalization_38_gamma_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_38_beta_path = dir_prefix + std::string("batch_normalization_38_beta.bin"); - void* batch_normalization_38_beta = readTrainedWeights(batch_normalization_38_beta_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_38_mean_path = dir_prefix + std::string("batch_normalization_38_mean.bin"); - void* batch_normalization_38_mean = readTrainedWeights(batch_normalization_38_mean_path.c_str(), 0,1,96,1,1); - std::string batch_normalization_38_variance_path = dir_prefix + std::string("batch_normalization_38_variance.bin"); - void* batch_normalization_38_variance = readTrainedWeights(batch_normalization_38_variance_path.c_str(), 0,1,96,1,1); - std::string conv2d_27_w_path = dir_prefix + std::string("conv2d_27_w.bin"); - void* conv2d_27_w = readTrainedWeights(conv2d_27_w_path.c_str(), 0,576,96,1,1); - std::string batch_normalization_39_gamma_path = dir_prefix + std::string("batch_normalization_39_gamma.bin"); - void* batch_normalization_39_gamma = readTrainedWeights(batch_normalization_39_gamma_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_39_beta_path = dir_prefix + std::string("batch_normalization_39_beta.bin"); - void* batch_normalization_39_beta = readTrainedWeights(batch_normalization_39_beta_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_39_mean_path = dir_prefix + std::string("batch_normalization_39_mean.bin"); - void* batch_normalization_39_mean = readTrainedWeights(batch_normalization_39_mean_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_39_variance_path = dir_prefix + std::string("batch_normalization_39_variance.bin"); - void* batch_normalization_39_variance = readTrainedWeights(batch_normalization_39_variance_path.c_str(), 0,1,576,1,1); - std::string depthwise_conv2d_14_w_path = dir_prefix + std::string("depthwise_conv2d_14_w.bin"); - void* depthwise_conv2d_14_w = readTrainedWeights(depthwise_conv2d_14_w_path.c_str(), 0,576,1,3,3); - std::string batch_normalization_40_gamma_path = dir_prefix + std::string("batch_normalization_40_gamma.bin"); - void* batch_normalization_40_gamma = readTrainedWeights(batch_normalization_40_gamma_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_40_beta_path = dir_prefix + std::string("batch_normalization_40_beta.bin"); - void* batch_normalization_40_beta = readTrainedWeights(batch_normalization_40_beta_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_40_mean_path = dir_prefix + std::string("batch_normalization_40_mean.bin"); - void* batch_normalization_40_mean = readTrainedWeights(batch_normalization_40_mean_path.c_str(), 0,1,576,1,1); - std::string batch_normalization_40_variance_path = dir_prefix + std::string("batch_normalization_40_variance.bin"); - void* batch_normalization_40_variance = readTrainedWeights(batch_normalization_40_variance_path.c_str(), 0,1,576,1,1); - std::string conv2d_28_w_path = dir_prefix + std::string("conv2d_28_w.bin"); - void* conv2d_28_w = readTrainedWeights(conv2d_28_w_path.c_str(), 0,160,576,1,1); - std::string batch_normalization_41_gamma_path = dir_prefix + std::string("batch_normalization_41_gamma.bin"); - void* batch_normalization_41_gamma = readTrainedWeights(batch_normalization_41_gamma_path.c_str(), 0,1,160,1,1); - std::string batch_normalization_41_beta_path = dir_prefix + std::string("batch_normalization_41_beta.bin"); - void* batch_normalization_41_beta = readTrainedWeights(batch_normalization_41_beta_path.c_str(), 0,1,160,1,1); - std::string batch_normalization_41_mean_path = dir_prefix + std::string("batch_normalization_41_mean.bin"); - void* batch_normalization_41_mean = readTrainedWeights(batch_normalization_41_mean_path.c_str(), 0,1,160,1,1); - std::string batch_normalization_41_variance_path = dir_prefix + std::string("batch_normalization_41_variance.bin"); - void* batch_normalization_41_variance = readTrainedWeights(batch_normalization_41_variance_path.c_str(), 0,1,160,1,1); - std::string conv2d_29_w_path = dir_prefix + std::string("conv2d_29_w.bin"); - void* conv2d_29_w = readTrainedWeights(conv2d_29_w_path.c_str(), 0,960,160,1,1); - std::string batch_normalization_42_gamma_path = dir_prefix + std::string("batch_normalization_42_gamma.bin"); - void* batch_normalization_42_gamma = readTrainedWeights(batch_normalization_42_gamma_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_42_beta_path = dir_prefix + std::string("batch_normalization_42_beta.bin"); - void* batch_normalization_42_beta = readTrainedWeights(batch_normalization_42_beta_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_42_mean_path = dir_prefix + std::string("batch_normalization_42_mean.bin"); - void* batch_normalization_42_mean = readTrainedWeights(batch_normalization_42_mean_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_42_variance_path = dir_prefix + std::string("batch_normalization_42_variance.bin"); - void* batch_normalization_42_variance = readTrainedWeights(batch_normalization_42_variance_path.c_str(), 0,1,960,1,1); - std::string depthwise_conv2d_15_w_path = dir_prefix + std::string("depthwise_conv2d_15_w.bin"); - void* depthwise_conv2d_15_w = readTrainedWeights(depthwise_conv2d_15_w_path.c_str(), 0,960,1,3,3); - std::string batch_normalization_43_gamma_path = dir_prefix + std::string("batch_normalization_43_gamma.bin"); - void* batch_normalization_43_gamma = readTrainedWeights(batch_normalization_43_gamma_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_43_beta_path = dir_prefix + std::string("batch_normalization_43_beta.bin"); - void* batch_normalization_43_beta = readTrainedWeights(batch_normalization_43_beta_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_43_mean_path = dir_prefix + std::string("batch_normalization_43_mean.bin"); - void* batch_normalization_43_mean = readTrainedWeights(batch_normalization_43_mean_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_43_variance_path = dir_prefix + std::string("batch_normalization_43_variance.bin"); - void* batch_normalization_43_variance = readTrainedWeights(batch_normalization_43_variance_path.c_str(), 0,1,960,1,1); - std::string conv2d_30_w_path = dir_prefix + std::string("conv2d_30_w.bin"); - void* conv2d_30_w = readTrainedWeights(conv2d_30_w_path.c_str(), 0,160,960,1,1); - std::string batch_normalization_44_gamma_path = dir_prefix + std::string("batch_normalization_44_gamma.bin"); - void* batch_normalization_44_gamma = readTrainedWeights(batch_normalization_44_gamma_path.c_str(), 0,1,160,1,1); - std::string batch_normalization_44_beta_path = dir_prefix + std::string("batch_normalization_44_beta.bin"); - void* batch_normalization_44_beta = readTrainedWeights(batch_normalization_44_beta_path.c_str(), 0,1,160,1,1); - std::string batch_normalization_44_mean_path = dir_prefix + std::string("batch_normalization_44_mean.bin"); - void* batch_normalization_44_mean = readTrainedWeights(batch_normalization_44_mean_path.c_str(), 0,1,160,1,1); - std::string batch_normalization_44_variance_path = dir_prefix + std::string("batch_normalization_44_variance.bin"); - void* batch_normalization_44_variance = readTrainedWeights(batch_normalization_44_variance_path.c_str(), 0,1,160,1,1); - std::string conv2d_31_w_path = dir_prefix + std::string("conv2d_31_w.bin"); - void* conv2d_31_w = readTrainedWeights(conv2d_31_w_path.c_str(), 0,960,160,1,1); - std::string batch_normalization_45_gamma_path = dir_prefix + std::string("batch_normalization_45_gamma.bin"); - void* batch_normalization_45_gamma = readTrainedWeights(batch_normalization_45_gamma_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_45_beta_path = dir_prefix + std::string("batch_normalization_45_beta.bin"); - void* batch_normalization_45_beta = readTrainedWeights(batch_normalization_45_beta_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_45_mean_path = dir_prefix + std::string("batch_normalization_45_mean.bin"); - void* batch_normalization_45_mean = readTrainedWeights(batch_normalization_45_mean_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_45_variance_path = dir_prefix + std::string("batch_normalization_45_variance.bin"); - void* batch_normalization_45_variance = readTrainedWeights(batch_normalization_45_variance_path.c_str(), 0,1,960,1,1); - std::string depthwise_conv2d_16_w_path = dir_prefix + std::string("depthwise_conv2d_16_w.bin"); - void* depthwise_conv2d_16_w = readTrainedWeights(depthwise_conv2d_16_w_path.c_str(), 0,960,1,3,3); - std::string batch_normalization_46_gamma_path = dir_prefix + std::string("batch_normalization_46_gamma.bin"); - void* batch_normalization_46_gamma = readTrainedWeights(batch_normalization_46_gamma_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_46_beta_path = dir_prefix + std::string("batch_normalization_46_beta.bin"); - void* batch_normalization_46_beta = readTrainedWeights(batch_normalization_46_beta_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_46_mean_path = dir_prefix + std::string("batch_normalization_46_mean.bin"); - void* batch_normalization_46_mean = readTrainedWeights(batch_normalization_46_mean_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_46_variance_path = dir_prefix + std::string("batch_normalization_46_variance.bin"); - void* batch_normalization_46_variance = readTrainedWeights(batch_normalization_46_variance_path.c_str(), 0,1,960,1,1); - std::string conv2d_32_w_path = dir_prefix + std::string("conv2d_32_w.bin"); - void* conv2d_32_w = readTrainedWeights(conv2d_32_w_path.c_str(), 0,160,960,1,1); - std::string batch_normalization_47_gamma_path = dir_prefix + std::string("batch_normalization_47_gamma.bin"); - void* batch_normalization_47_gamma = readTrainedWeights(batch_normalization_47_gamma_path.c_str(), 0,1,160,1,1); - std::string batch_normalization_47_beta_path = dir_prefix + std::string("batch_normalization_47_beta.bin"); - void* batch_normalization_47_beta = readTrainedWeights(batch_normalization_47_beta_path.c_str(), 0,1,160,1,1); - std::string batch_normalization_47_mean_path = dir_prefix + std::string("batch_normalization_47_mean.bin"); - void* batch_normalization_47_mean = readTrainedWeights(batch_normalization_47_mean_path.c_str(), 0,1,160,1,1); - std::string batch_normalization_47_variance_path = dir_prefix + std::string("batch_normalization_47_variance.bin"); - void* batch_normalization_47_variance = readTrainedWeights(batch_normalization_47_variance_path.c_str(), 0,1,160,1,1); - std::string conv2d_33_w_path = dir_prefix + std::string("conv2d_33_w.bin"); - void* conv2d_33_w = readTrainedWeights(conv2d_33_w_path.c_str(), 0,960,160,1,1); - std::string batch_normalization_48_gamma_path = dir_prefix + std::string("batch_normalization_48_gamma.bin"); - void* batch_normalization_48_gamma = readTrainedWeights(batch_normalization_48_gamma_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_48_beta_path = dir_prefix + std::string("batch_normalization_48_beta.bin"); - void* batch_normalization_48_beta = readTrainedWeights(batch_normalization_48_beta_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_48_mean_path = dir_prefix + std::string("batch_normalization_48_mean.bin"); - void* batch_normalization_48_mean = readTrainedWeights(batch_normalization_48_mean_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_48_variance_path = dir_prefix + std::string("batch_normalization_48_variance.bin"); - void* batch_normalization_48_variance = readTrainedWeights(batch_normalization_48_variance_path.c_str(), 0,1,960,1,1); - std::string depthwise_conv2d_17_w_path = dir_prefix + std::string("depthwise_conv2d_17_w.bin"); - void* depthwise_conv2d_17_w = readTrainedWeights(depthwise_conv2d_17_w_path.c_str(), 0,960,1,3,3); - std::string batch_normalization_49_gamma_path = dir_prefix + std::string("batch_normalization_49_gamma.bin"); - void* batch_normalization_49_gamma = readTrainedWeights(batch_normalization_49_gamma_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_49_beta_path = dir_prefix + std::string("batch_normalization_49_beta.bin"); - void* batch_normalization_49_beta = readTrainedWeights(batch_normalization_49_beta_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_49_mean_path = dir_prefix + std::string("batch_normalization_49_mean.bin"); - void* batch_normalization_49_mean = readTrainedWeights(batch_normalization_49_mean_path.c_str(), 0,1,960,1,1); - std::string batch_normalization_49_variance_path = dir_prefix + std::string("batch_normalization_49_variance.bin"); - void* batch_normalization_49_variance = readTrainedWeights(batch_normalization_49_variance_path.c_str(), 0,1,960,1,1); - std::string conv2d_34_w_path = dir_prefix + std::string("conv2d_34_w.bin"); - void* conv2d_34_w = readTrainedWeights(conv2d_34_w_path.c_str(), 0,320,960,1,1); - std::string batch_normalization_50_gamma_path = dir_prefix + std::string("batch_normalization_50_gamma.bin"); - void* batch_normalization_50_gamma = readTrainedWeights(batch_normalization_50_gamma_path.c_str(), 0,1,320,1,1); - std::string batch_normalization_50_beta_path = dir_prefix + std::string("batch_normalization_50_beta.bin"); - void* batch_normalization_50_beta = readTrainedWeights(batch_normalization_50_beta_path.c_str(), 0,1,320,1,1); - std::string batch_normalization_50_mean_path = dir_prefix + std::string("batch_normalization_50_mean.bin"); - void* batch_normalization_50_mean = readTrainedWeights(batch_normalization_50_mean_path.c_str(), 0,1,320,1,1); - std::string batch_normalization_50_variance_path = dir_prefix + std::string("batch_normalization_50_variance.bin"); - void* batch_normalization_50_variance = readTrainedWeights(batch_normalization_50_variance_path.c_str(), 0,1,320,1,1); - std::string conv2d_35_w_path = dir_prefix + std::string("conv2d_35_w.bin"); - void* conv2d_35_w = readTrainedWeights(conv2d_35_w_path.c_str(), 0,1280,320,1,1); - std::string batch_normalization_51_gamma_path = dir_prefix + std::string("batch_normalization_51_gamma.bin"); - void* batch_normalization_51_gamma = readTrainedWeights(batch_normalization_51_gamma_path.c_str(), 0,1,1280,1,1); - std::string batch_normalization_51_beta_path = dir_prefix + std::string("batch_normalization_51_beta.bin"); - void* batch_normalization_51_beta = readTrainedWeights(batch_normalization_51_beta_path.c_str(), 0,1,1280,1,1); - std::string batch_normalization_51_mean_path = dir_prefix + std::string("batch_normalization_51_mean.bin"); - void* batch_normalization_51_mean = readTrainedWeights(batch_normalization_51_mean_path.c_str(), 0,1,1280,1,1); - std::string batch_normalization_51_variance_path = dir_prefix + std::string("batch_normalization_51_variance.bin"); - void* batch_normalization_51_variance = readTrainedWeights(batch_normalization_51_variance_path.c_str(), 0,1,1280,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,5120,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 500; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_2 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_9 = tensorConvolution(var_8, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_10 = tensorBatchNorm(var_9, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_11 = tensorRelu(var_10); - void* var_13 = tensorConvolution(var_11, depthwise_conv2d_2_w, 1, 1, 1, 1, 1, 96); - void* var_14 = tensorBatchNorm(var_13, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_15 = tensorRelu(var_14); - void* var_16 = tensorConvolution(var_15, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_17 = tensorBatchNorm(var_16, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_18 = tensorConvolution(var_17, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_22 = tensorConvolution(var_20, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 144); - void* var_23 = tensorBatchNorm(var_22, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_24 = tensorRelu(var_23); - void* var_25 = tensorConvolution(var_24, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_26 = tensorBatchNorm(var_25, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_27 = tensorAdd(var_17, var_26); - void* var_28 = tensorConvolution(var_27, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_29 = tensorBatchNorm(var_28, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_30 = tensorRelu(var_29); - void* var_32 = tensorConvolution(var_30, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 144); - void* var_33 = tensorBatchNorm(var_32, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_34 = tensorRelu(var_33); - void* var_35 = tensorConvolution(var_34, conv2d_8_w, 0, 0, 1, 1, 1, 1); - void* var_36 = tensorBatchNorm(var_35, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_37 = tensorConvolution(var_36, conv2d_9_w, 0, 0, 1, 1, 1, 1); - void* var_38 = tensorBatchNorm(var_37, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_39 = tensorRelu(var_38); - void* var_41 = tensorConvolution(var_39, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 192); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_10_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorBatchNorm(var_44, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_46 = tensorAdd(var_36, var_45); - void* var_47 = tensorConvolution(var_46, conv2d_11_w, 0, 0, 1, 1, 1, 1); - void* var_48 = tensorBatchNorm(var_47, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_49 = tensorRelu(var_48); - void* var_51 = tensorConvolution(var_49, depthwise_conv2d_6_w, 1, 1, 1, 1, 1, 192); - void* var_52 = tensorBatchNorm(var_51, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_53 = tensorRelu(var_52); - void* var_54 = tensorConvolution(var_53, conv2d_12_w, 0, 0, 1, 1, 1, 1); - void* var_55 = tensorBatchNorm(var_54, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_56 = tensorAdd(var_46, var_55); - void* var_57 = tensorConvolution(var_56, conv2d_13_w, 0, 0, 1, 1, 1, 1); - void* var_58 = tensorBatchNorm(var_57, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_59 = tensorRelu(var_58); - void* var_61 = tensorConvolution(var_59, depthwise_conv2d_7_w, 1, 1, 2, 2, 1, 192); - void* var_62 = tensorBatchNorm(var_61, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_63 = tensorRelu(var_62); - void* var_64 = tensorConvolution(var_63, conv2d_14_w, 0, 0, 1, 1, 1, 1); - void* var_65 = tensorBatchNorm(var_64, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_66 = tensorConvolution(var_65, conv2d_15_w, 0, 0, 1, 1, 1, 1); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_68 = tensorRelu(var_67); - void* var_70 = tensorConvolution(var_68, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 384); - void* var_71 = tensorBatchNorm(var_70, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_72 = tensorRelu(var_71); - void* var_73 = tensorConvolution(var_72, conv2d_16_w, 0, 0, 1, 1, 1, 1); - void* var_74 = tensorBatchNorm(var_73, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_75 = tensorAdd(var_65, var_74); - void* var_76 = tensorConvolution(var_75, conv2d_17_w, 0, 0, 1, 1, 1, 1); - void* var_77 = tensorBatchNorm(var_76, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_78 = tensorRelu(var_77); - void* var_80 = tensorConvolution(var_78, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 384); - void* var_81 = tensorBatchNorm(var_80, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_82 = tensorRelu(var_81); - void* var_83 = tensorConvolution(var_82, conv2d_18_w, 0, 0, 1, 1, 1, 1); - void* var_84 = tensorBatchNorm(var_83, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_85 = tensorAdd(var_75, var_84); - void* var_86 = tensorConvolution(var_85, conv2d_19_w, 0, 0, 1, 1, 1, 1); - void* var_87 = tensorBatchNorm(var_86, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_88 = tensorRelu(var_87); - void* var_90 = tensorConvolution(var_88, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 384); - void* var_91 = tensorBatchNorm(var_90, batch_normalization_28_gamma, batch_normalization_28_beta, batch_normalization_28_mean, batch_normalization_28_variance, 0.001); - void* var_92 = tensorRelu(var_91); - void* var_93 = tensorConvolution(var_92, conv2d_20_w, 0, 0, 1, 1, 1, 1); - void* var_94 = tensorBatchNorm(var_93, batch_normalization_29_gamma, batch_normalization_29_beta, batch_normalization_29_mean, batch_normalization_29_variance, 0.001); - void* var_95 = tensorAdd(var_85, var_94); - void* var_97 = tensorConvolution(var_95, conv2d_21_w, 0, 0, 1, 1, 1, 1); - void* var_98 = tensorBatchNorm(var_97, batch_normalization_30_gamma, batch_normalization_30_beta, batch_normalization_30_mean, batch_normalization_30_variance, 0.001); - void* var_99 = tensorRelu(var_98); - void* var_101 = tensorConvolution(var_99, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 384); - void* var_102 = tensorBatchNorm(var_101, batch_normalization_31_gamma, batch_normalization_31_beta, batch_normalization_31_mean, batch_normalization_31_variance, 0.001); - void* var_103 = tensorRelu(var_102); - void* var_104 = tensorConvolution(var_103, conv2d_22_w, 0, 0, 1, 1, 1, 1); - void* var_105 = tensorBatchNorm(var_104, batch_normalization_32_gamma, batch_normalization_32_beta, batch_normalization_32_mean, batch_normalization_32_variance, 0.001); - void* var_106 = tensorConvolution(var_105, conv2d_23_w, 0, 0, 1, 1, 1, 1); - void* var_107 = tensorBatchNorm(var_106, batch_normalization_33_gamma, batch_normalization_33_beta, batch_normalization_33_mean, batch_normalization_33_variance, 0.001); - void* var_108 = tensorRelu(var_107); - void* var_110 = tensorConvolution(var_108, depthwise_conv2d_12_w, 1, 1, 1, 1, 1, 576); - void* var_111 = tensorBatchNorm(var_110, batch_normalization_34_gamma, batch_normalization_34_beta, batch_normalization_34_mean, batch_normalization_34_variance, 0.001); - void* var_112 = tensorRelu(var_111); - void* var_113 = tensorConvolution(var_112, conv2d_24_w, 0, 0, 1, 1, 1, 1); - void* var_114 = tensorBatchNorm(var_113, batch_normalization_35_gamma, batch_normalization_35_beta, batch_normalization_35_mean, batch_normalization_35_variance, 0.001); - void* var_115 = tensorAdd(var_105, var_114); - void* var_116 = tensorConvolution(var_115, conv2d_25_w, 0, 0, 1, 1, 1, 1); - void* var_117 = tensorBatchNorm(var_116, batch_normalization_36_gamma, batch_normalization_36_beta, batch_normalization_36_mean, batch_normalization_36_variance, 0.001); - void* var_118 = tensorRelu(var_117); - void* var_120 = tensorConvolution(var_118, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 576); - void* var_121 = tensorBatchNorm(var_120, batch_normalization_37_gamma, batch_normalization_37_beta, batch_normalization_37_mean, batch_normalization_37_variance, 0.001); - void* var_122 = tensorRelu(var_121); - void* var_123 = tensorConvolution(var_122, conv2d_26_w, 0, 0, 1, 1, 1, 1); - void* var_124 = tensorBatchNorm(var_123, batch_normalization_38_gamma, batch_normalization_38_beta, batch_normalization_38_mean, batch_normalization_38_variance, 0.001); - void* var_125 = tensorAdd(var_115, var_124); - void* var_127 = tensorConvolution(var_125, conv2d_27_w, 0, 0, 1, 1, 1, 1); - void* var_128 = tensorBatchNorm(var_127, batch_normalization_39_gamma, batch_normalization_39_beta, batch_normalization_39_mean, batch_normalization_39_variance, 0.001); - void* var_129 = tensorRelu(var_128); - void* var_131 = tensorConvolution(var_129, depthwise_conv2d_14_w, 1, 1, 2, 2, 1, 576); - void* var_132 = tensorBatchNorm(var_131, batch_normalization_40_gamma, batch_normalization_40_beta, batch_normalization_40_mean, batch_normalization_40_variance, 0.001); - void* var_133 = tensorRelu(var_132); - void* var_134 = tensorConvolution(var_133, conv2d_28_w, 0, 0, 1, 1, 1, 1); - void* var_135 = tensorBatchNorm(var_134, batch_normalization_41_gamma, batch_normalization_41_beta, batch_normalization_41_mean, batch_normalization_41_variance, 0.001); - void* var_136 = tensorConvolution(var_135, conv2d_29_w, 0, 0, 1, 1, 1, 1); - void* var_137 = tensorBatchNorm(var_136, batch_normalization_42_gamma, batch_normalization_42_beta, batch_normalization_42_mean, batch_normalization_42_variance, 0.001); - void* var_138 = tensorRelu(var_137); - void* var_140 = tensorConvolution(var_138, depthwise_conv2d_15_w, 1, 1, 1, 1, 1, 960); - void* var_141 = tensorBatchNorm(var_140, batch_normalization_43_gamma, batch_normalization_43_beta, batch_normalization_43_mean, batch_normalization_43_variance, 0.001); - void* var_142 = tensorRelu(var_141); - void* var_143 = tensorConvolution(var_142, conv2d_30_w, 0, 0, 1, 1, 1, 1); - void* var_144 = tensorBatchNorm(var_143, batch_normalization_44_gamma, batch_normalization_44_beta, batch_normalization_44_mean, batch_normalization_44_variance, 0.001); - void* var_145 = tensorAdd(var_135, var_144); - void* var_146 = tensorConvolution(var_145, conv2d_31_w, 0, 0, 1, 1, 1, 1); - void* var_147 = tensorBatchNorm(var_146, batch_normalization_45_gamma, batch_normalization_45_beta, batch_normalization_45_mean, batch_normalization_45_variance, 0.001); - void* var_148 = tensorRelu(var_147); - void* var_150 = tensorConvolution(var_148, depthwise_conv2d_16_w, 1, 1, 1, 1, 1, 960); - void* var_151 = tensorBatchNorm(var_150, batch_normalization_46_gamma, batch_normalization_46_beta, batch_normalization_46_mean, batch_normalization_46_variance, 0.001); - void* var_152 = tensorRelu(var_151); - void* var_153 = tensorConvolution(var_152, conv2d_32_w, 0, 0, 1, 1, 1, 1); - void* var_154 = tensorBatchNorm(var_153, batch_normalization_47_gamma, batch_normalization_47_beta, batch_normalization_47_mean, batch_normalization_47_variance, 0.001); - void* var_155 = tensorAdd(var_145, var_154); - void* var_157 = tensorConvolution(var_155, conv2d_33_w, 0, 0, 1, 1, 1, 1); - void* var_158 = tensorBatchNorm(var_157, batch_normalization_48_gamma, batch_normalization_48_beta, batch_normalization_48_mean, batch_normalization_48_variance, 0.001); - void* var_159 = tensorRelu(var_158); - void* var_161 = tensorConvolution(var_159, depthwise_conv2d_17_w, 1, 1, 1, 1, 1, 960); - void* var_162 = tensorBatchNorm(var_161, batch_normalization_49_gamma, batch_normalization_49_beta, batch_normalization_49_mean, batch_normalization_49_variance, 0.001); - void* var_163 = tensorRelu(var_162); - void* var_164 = tensorConvolution(var_163, conv2d_34_w, 0, 0, 1, 1, 1, 1); - void* var_165 = tensorBatchNorm(var_164, batch_normalization_50_gamma, batch_normalization_50_beta, batch_normalization_50_mean, batch_normalization_50_variance, 0.001); - void* var_167 = tensorConvolution(var_165, conv2d_35_w, 0, 0, 1, 1, 1, 1); - void* var_168 = tensorBatchNorm(var_167, batch_normalization_51_gamma, batch_normalization_51_beta, batch_normalization_51_mean, batch_normalization_51_variance, 0.001); - void* var_169 = tensorRelu(var_168); - void* var_170 = tensorPooling(var_169,1,2,2,0,0,2,2); - void* var_172 = tensorGemmGPU(var_170, dense_1_w); - void* var_173 = tensorAdd(var_172, dense_1_b); - void* var_174 = tensorSoftmax(var_173); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_174); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling.cc deleted file mode 100644 index 50df874874592a94238e596189b6a477fb66f05f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling.cc +++ /dev/null @@ -1,166 +0,0 @@ -// Per tensor operation - -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -/* NOTE: Reference Architecture to use for profiling */ -void testCifarNet(){ - - printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); - - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("norm_cifar_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin", - float_type, 32, 3, 3, 3); - void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin", - float_type, 32, 32, 3, 3); - void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin", - float_type, 1, 32, 1, 1); - void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin", - float_type, 64, 32, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin", - float_type, 1, 64, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin", - float_type, 64, 64, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin", - float_type, 1, 64, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin", - float_type, 128, 64, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin", - float_type, 1, 128, 1, 1); - void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin", - float_type, 128, 128, 3, 3); - void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin", - float_type, 1, 128, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin", - float_type, 1, 1, 2048, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - startMemTracking(); - - int total_runs = 10; - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - double total_energy = 0.0; - - for(int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - int start = i * batch_size; - int end = (i + 1) * batch_size; - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - // FIRST Tensor Runtime CALL - profiler.resume_profiler(); - void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv1out, conv1_bias); - void* conv1_tanh = tensorTanh(conv1out); - - // 2nd Layer - void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); - void* conv2_tanh = tensorTanh(conv2out); - void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 3rd Layer - void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv3out, conv3_bias); - void* conv3_tanh = tensorTanh(conv3out); - - // 4th Layer - void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv4out, conv4_bias); - void* conv4_tanh = tensorTanh(conv4out); - void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 5th Layer - void* conv5out = tensorConvolution(pool4out, conv5_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv5out, conv5_bias); - void* conv5_tanh = tensorTanh(conv5out); - - // 6th Layer - void* conv6out = tensorConvolution(conv5_tanh, conv6_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv6out, conv6_bias); - void* conv6_tanh = tensorTanh(conv6out); - void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2); - - // final FC Layer - void* gemm1out = tensorGemmGPU(pool6out, fc1_weights); - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - void* result = tensorSoftmax(gemm1biasout); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - total_energy += time_energy.second; - - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels, batch_size, result); - final_accuracy += accuracy; - - freeBatchMemory(); - } - } - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"Average energy: " << total_energy / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - stopProfiling(); - final_accuracy = (final_accuracy / batch_count) / total_runs; - dumpFinalAccuracy(final_accuracy); -} - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - testCifarNet(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling_tensors.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling_tensors.cc deleted file mode 100644 index f95a7bda4fc581e4c40d4882304156f2420f22a5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet2_profiling_tensors.cc +++ /dev/null @@ -1,262 +0,0 @@ -// Per tensor operation - -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -void add_data(std::unordered_map<std::string, std::pair<double, double> >& total_time_energies, Profiler& profiler, const std::string& op_name){ - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - - auto itr = total_time_energies.find(op_name); - if (itr == total_time_energies.end()){ - total_time_energies.insert(std::make_pair(op_name, time_energy)); - } else { - itr->second.first += time_energy.first; - itr->second.second += time_energy.second; - } - profiler.reset(); -} - -/* NOTE: Reference Architecture to use for profiling */ -void testCifarNet(){ - - printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); - - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("norm_cifar_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin", - float_type, 32, 3, 3, 3); - void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin", - float_type, 32, 32, 3, 3); - void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin", - float_type, 1, 32, 1, 1); - void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin", - float_type, 64, 32, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin", - float_type, 1, 64, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin", - float_type, 64, 64, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin", - float_type, 1, 64, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin", - float_type, 128, 64, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin", - float_type, 1, 128, 1, 1); - void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin", - float_type, 128, 128, 3, 3); - void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin", - float_type, 1, 128, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin", - float_type, 1, 1, 2048, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - std::ofstream online_profiler_output; - online_profiler_output.open("online_output.txt"); - - startMemTracking(); - - // NOTE: CHANGED INPUT TO STANDARDIZE - int total_runs = 50; // FOR NOW 100; - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - - Profiler profiler; - profiler.start_profiler(); - - // Get the total time and energy per tensor per run - std::unordered_map<std::string, std::pair<double, double> > total_time_energies; - - for(int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - int start = i * batch_size; - int end = (i + 1) * batch_size; - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - // FIRST Tensor Runtime CALL - profiler.resume_profiler(); - void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - add_data(total_time_energies, profiler, "Conv1"); - - profiler.resume_profiler(); - tensorAdd(conv1out, conv1_bias); - add_data(total_time_energies, profiler, "Add1"); - - profiler.resume_profiler(); - void* conv1_tanh = tensorTanh(conv1out); - add_data(total_time_energies, profiler, "Tanh1"); - - // 2nd Layer - profiler.resume_profiler(); - void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - add_data(total_time_energies, profiler, "Conv2"); - - profiler.resume_profiler(); - tensorAdd(conv2out, conv2_bias); - add_data(total_time_energies, profiler, "Add2"); - - profiler.resume_profiler(); - void* conv2_tanh = tensorTanh(conv2out); - add_data(total_time_energies, profiler, "Tanh2"); - - profiler.resume_profiler(); - void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - add_data(total_time_energies, profiler, "Pool1"); - - // 3rd Layer - profiler.resume_profiler(); - void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - add_data(total_time_energies, profiler, "Conv3"); - - profiler.resume_profiler(); - tensorAdd(conv3out, conv3_bias); - add_data(total_time_energies, profiler, "Add3"); - - profiler.resume_profiler(); - void* conv3_tanh = tensorTanh(conv3out); - add_data(total_time_energies, profiler, "Tanh3"); - - // 4th Layer - profiler.resume_profiler(); - void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - add_data(total_time_energies, profiler, "Conv4"); - - profiler.resume_profiler(); - tensorAdd(conv4out, conv4_bias); - add_data(total_time_energies, profiler, "Add4"); - - profiler.resume_profiler(); - void* conv4_tanh = tensorTanh(conv4out); - add_data(total_time_energies, profiler, "Tanh4"); - - profiler.resume_profiler(); - void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2); - add_data(total_time_energies, profiler, "Pool2"); - - // 5th Layer - profiler.resume_profiler(); - void* conv5out = tensorConvolution(pool4out, conv5_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - add_data(total_time_energies, profiler, "Conv5"); - - profiler.resume_profiler(); - tensorAdd(conv5out, conv5_bias); - add_data(total_time_energies, profiler, "Add5"); - - profiler.resume_profiler(); - void* conv5_tanh = tensorTanh(conv5out); - add_data(total_time_energies, profiler, "Tanh5"); - - // 6th Layer - profiler.resume_profiler(); - void* conv6out = tensorConvolution(conv5_tanh, conv6_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - add_data(total_time_energies, profiler, "Conv6"); - - profiler.resume_profiler(); - tensorAdd(conv6out, conv6_bias); - add_data(total_time_energies, profiler, "Add6"); - - profiler.resume_profiler(); - void* conv6_tanh = tensorTanh(conv6out); - add_data(total_time_energies, profiler, "Tanh6"); - - profiler.resume_profiler(); - void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2); - add_data(total_time_energies, profiler, "Pool3"); - - // final FC Layer - profiler.resume_profiler(); - void* gemm1out = tensorGemmGPU(pool6out, fc1_weights); - add_data(total_time_energies, profiler, "Mul1"); // ASSUMING that this is mul1 - - std::cout<<"-----------------------------------ADD 7--------------------------------\n"; - profiler.resume_profiler(); - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - add_data(total_time_energies, profiler, "Add7"); - std::cout<<"-----------------------------------ADD 7 ENDS --------------------------------\n"; - - profiler.resume_profiler(); - void* result = tensorSoftmax(gemm1biasout); - add_data(total_time_energies, profiler, "Softmax1"); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels, batch_size, result); - final_accuracy += accuracy; - - freeBatchMemory(); - } - } - profiler.stop_profiler(); - - stopProfiling(); - //online_profiler_output << "Total time: " << total_time << ", " << total_energy << "\n"; - // Now compute the averages across batches - std::ofstream ofs; - std::string arr[] = {"Add1", "Add2", "Add3", "Add4", "Add5", "Add6", "Add7", - "Conv1", "Conv2", "Conv3", "Conv4", "Conv5", "Conv6", - "Mul1", - "Pool1", "Pool2", "Pool3", - "Softmax1", - "Tanh1", "Tanh2", "Tanh3", "Tanh4", "Tanh5", "Tanh6"}; - ofs.open("online_profiler_tensor_data.txt"); - std::vector<std::string> ordered_keys(std::begin(arr), std::end(arr)); - for (const std::string& key : ordered_keys){ - const auto& data_pair = total_time_energies[key]; - ofs << key << ": " << data_pair.first / total_runs << "\t" << data_pair.second / total_runs << '\n'; - std::cout<< key << ": " << data_pair.first / total_runs << "\t" << data_pair.second / total_runs << '\n'; - } - - /* - ofs.open("online_profiler_tensor_data.txt"); - for (const auto& tensor_data : total_time_energies){ - ofs << tensor_data.first << ": " << tensor_data.second.first / total_runs << "\t" << tensor_data.second.second / total_runs << '\n'; - }*/ - ofs.close(); - final_accuracy = (final_accuracy / batch_count) / total_runs; - dumpFinalAccuracy(final_accuracy); - online_profiler_output.close(); -} - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - testCifarNet(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet_cifar10_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet_cifar10_profiling.cc deleted file mode 100644 index 94cef7fba14e0b9d1d9ec72b508ccd11cb560a87..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/alexnet_cifar10_profiling.cc +++ /dev/null @@ -1,127 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/"); - std::string input_path = dir_prefix + std::string("input.bin"); - //void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); - std::string labels_path = dir_prefix + std::string("labels.bin"); - //uint8_t* labels = readLabels(labels_path.c_str(),10000); - std::string conv2d_1_w_path = dir_prefix + std::string("conv0.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); - std::string conv2d_1_b_path = dir_prefix + std::string("conv_bias0.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv3.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv_bias3.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv6.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv_bias6.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv7.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv_bias7.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv8.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv_bias8.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("fc12.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); - std::string dense_1_b_path = dir_prefix + std::string("fc_bias12.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - double total_energy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - - for(int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - profiler.resume_profiler(); - void* var_0 = tensorConvolution(input, conv2d_1_w, 5, 5, 1, 1, 1, 0); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorTanh(var_1); - void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); - void* var_5 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); - void* var_6 = tensorAdd(var_5, conv2d_2_b); - void* var_7 = tensorTanh(var_6); - void* var_8 = tensorPooling(var_7,0,2,2,0,0,2,2); - void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_11 = tensorAdd(var_10, conv2d_3_b); - void* var_12 = tensorTanh(var_11); - void* var_13 = tensorConvolution(var_12, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_14 = tensorAdd(var_13, conv2d_4_b); - void* var_15 = tensorTanh(var_14); - void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorAdd(var_16, conv2d_5_b); - void* var_18 = tensorTanh(var_17); - void* var_19 = tensorPooling(var_18,0,2,2,0,0,2,2); - void* var_22 = tensorGemmGPU(var_19, dense_1_w); - void* var_23 = tensorAdd(var_22, dense_1_b); - void* var_24 = tensorSoftmax(var_23); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - total_energy += time_energy.second; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_24); - final_accuracy += accuracy; - - freeBatchMemory(); - } - } - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"Average energy: " << total_energy / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/blend_profiling.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/blend_profiling.cpp deleted file mode 100644 index bd7b8e86644ae04d684bb1d777fdb914f943d62f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/blend_profiling.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <algorithm> -#include <cassert> -#include <fstream> -#include <string> - -const size_t n_channels = 3; - -Tensor *gaussianFilter_(float div) { - std::vector<float> gauss_data = {1, 4, 6, 4, 1, 4, 16, 24, 16, - 4, 6, 24, 36, 24, 6, 4, 16, 24, - 16, 4, 1, 4, 6, 4, 1}; - for (float &f : gauss_data) - f /= div; - return (Tensor *)createFilterFromData( - CUDNN_DATA_FLOAT, gauss_data.data(), 5, 5, 1); -} - -Tensor *gaussianFilter() { return gaussianFilter_(16.0); } - -void *normalize(void *image) { - auto *max_1D = tensorReduce(image, 2, MathOp::Max); - auto *max = tensorReduce(max_1D, 3, MathOp::Max); - auto *img_norm = tensorMap2(MathOp::Div, image, max); - return img_norm; -} - -extern std::vector<size_t> sizes(Tensor *t); - -void forward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[1] == 3); - sz[0] = sz[0] * sz[1]; - sz[1] = 1; - reshape(tensor, sz); -} - -void backward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[0] % 3 == 0); - sz[0] = sz[0] / 3; - sz[1] = 3; - reshape(tensor, sz); -} - -void *sharpen(void *image) { - void *gaussian = gaussianFilter(); - forward_reshape(image); - void *blurred = - tensorConvApprox(image, gaussian, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); - backward_reshape(blurred); - backward_reshape(image); - void *blurred_norm = normalize(blurred); - void *image_norm = normalize(image); - void *ret = tensorMap2(MathOp::AddWeighted, blurred_norm, image_norm); - return ret; -} - -void *main_procedure(void *fg, void *bg) { - void *g_bg = sharpen(bg); - void *g_fg = sharpen(fg); - void *ret = tensorMap2(MathOp::Blend2, g_bg, g_fg); - return ret; -} - -extern void llvm_hpvm_initTensorRt(int gpuid); - -extern void llvm_hpvm_clearRuntimeController(); - -const size_t batch_size = 500; - -int main() { - const char *input1_path = "../model_params/image_processing_5k"; - const char *input2_path = "../model_params/image_processing_5k_shuffled"; - - llvm_hpvm_initTensorRt(0); - startMemTracking(); - size_t bstart = 0; - startProfiling(); - while (true) { - auto *background = readDataSet(input1_path, bstart, batch_size, n_channels), - *foreground = readDataSet(input2_path, bstart, batch_size, n_channels); - if (!background || !foreground) - break; - - auto *result = main_procedure(foreground, background); - bstart += batch_size; - freeBatchMemory(); - } - stopProfiling(); - llvm_hpvm_clearRuntimeController(); -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/canny_profiling.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/canny_profiling.cpp deleted file mode 100644 index 233f317e611e79f66038795b44b295fd3683ae4a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/canny_profiling.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <vector> - -Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { - int64_t m = (w - 1) / 2, n = (h - 1) / 2; - auto *data = new float[w * h]; - float sum = 0.0f; - for (int64_t i = -m; i <= m; i++) - for (int64_t j = -n; j <= n; j++) { - size_t idx = (i + m) * h + (j + n); - float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); - data[idx] = exp(exponent); - sum += data[idx]; - } - if (sum != 0.0f) - for (size_t i = 0; i < w * h; i++) - data[i] /= sum; - return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); -} - -std::pair<Tensor *, Tensor *> getSobelKernels() { - std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); - std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); - auto *t1 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1); - auto *t2 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1); - return std::make_pair(t1, t2); -} - -void *main_procedure(void *dataset) { - Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1); - Tensor *kernel_x, *kernel_y; - std::tie(kernel_x, kernel_y) = getSobelKernels(); - - // 0. Grayscale - auto *summed_image = tensorReduce(dataset, 1, MathOp::Add, 0.0f); - auto *grayscale_image = tensorMap1(MathOp::Avg3, summed_image); - // 1. Denoise - auto *image2 = - tensorConvApprox(grayscale_image, gaussian, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); - // 2. Get edge gradient / direction - auto *grad_x = - tensorConvApprox(image2, kernel_x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - auto *grad_y = - tensorConvApprox(image2, kernel_y, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - auto *grad_mag = tensorMap2(MathOp::Hypot, grad_x, grad_y); - // 2.5. Normalize grad magnitude - auto *grad_max_1D = tensorReduce(grad_mag, 2, MathOp::Max, 0.0f); - auto *grad_max = tensorReduce(grad_max_1D, 3, MathOp::Max, 0.0f); - auto *grad_mag_norm = tensorMap2(MathOp::Div, grad_mag, grad_max); - return grad_mag_norm; -} - -extern void llvm_hpvm_initTensorRt(int gpuid); - -extern void llvm_hpvm_clearRuntimeController(); - -const size_t batch_size = 500; - -int main(int argc, char *argv[]) { - const char *input1_path = "../model_params/image_processing_5k"; - - llvm_hpvm_initTensorRt(0); - size_t bstart = 0; - startMemTracking(); - startProfiling(); - while (true) { - Tensor *batch = readDataSet(input1_path, bstart, batch_size); - if (batch == nullptr) // If end of dataset - break; - - auto *result = main_procedure(batch); - bstart += batch_size; - freeBatchMemory(); - } - stopProfiling(); - llvm_hpvm_clearRuntimeController(); -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/fft_profiling.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/fft_profiling.cpp deleted file mode 100644 index 4c13e1587aa95a2eb4d1e645fa770f4b09d71dac..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/fft_profiling.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <cmath> -#include <iostream> -#include <thrust/complex.h> -#include <vector> - -const size_t batch_size = 250, total_max = 3000; -const size_t n_colors = N_RGB_CHAN; -const float psnr_threshold = 25.0; - -Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { - int64_t m = (w - 1) / 2, n = (h - 1) / 2; - auto *data = new float[w * h]; - float sum = 0.0f; - for (int64_t i = -m; i <= m; i++) - for (int64_t j = -n; j <= n; j++) { - size_t idx = (i + m) * h + (j + n); - float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); - data[idx] = exp(exponent); - sum += data[idx]; - } - if (sum != 0.0f) - for (size_t i = 0; i < w * h; i++) - data[i] /= sum; - return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); -} - -extern std::vector<size_t> sizes(Tensor *t); - -void forward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[1] == 3); - sz[0] = sz[0] * sz[1]; - sz[1] = 1; - reshape(tensor, sz); -} - -void backward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[0] % 3 == 0); - sz[0] = sz[0] / 3; - sz[1] = 3; - reshape(tensor, sz); -} - -static void *normalize(void *image) { - auto *max_1D = tensorReduce(image, 2, MathOp::Max); - auto *max = tensorReduce(max_1D, 3, MathOp::Max); - auto *img_norm = tensorMap2(MathOp::Div, image, max); - freeTensor(max_1D); - freeTensor(max); - return img_norm; -} - -void *main_procedure(void *dataset, void *gaussian) { - forward_reshape(dataset); - auto *sharpened = tensorConvApprox(dataset, gaussian, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - backward_reshape(sharpened); - return normalize(sharpened); -} - -int main(int argc, char *argv[]) { - const char *input1_path = "../model_params/image_processing_5k_128_128"; - llvm_hpvm_initTensorRt(0); - Tensor *gaussian = gaussianFilter(1, 3, 3, 1); - size_t bstart = 0; - startMemTracking(); - startProfiling(); - while (true) { - Tensor *batch = readDataSet(input1_path, bstart, batch_size); - if (batch == nullptr) // If end of dataset - break; - auto *result = main_procedure(batch, gaussian); - bstart += batch_size; - if (bstart >= total_max) - break; - freeBatchMemory(); - } - stopProfiling(); -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/lenet_keras_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/lenet_keras_profiling.cc deleted file mode 100644 index f55d883c2c11f1f0721b64d4ab00de373f685a3e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/lenet_keras_profiling.cc +++ /dev/null @@ -1,186 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - int total_runs = 10; - - printf("********* Lenet-2 Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 5000; - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet_keras/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_keras/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_keras/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_keras/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_keras/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet_keras/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet_keras/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet_keras/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - - clearTensorMap(); - - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - double total_energy = 0.0; - float final_accuracy = 0.0; - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performnce profiling - startProfiling(); - profiler.resume_profiler(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - - // NOTE: For tensorAdd, the only dimension that MUST match is channels - tensorAdd(conv1out, conv1_bias); // NOTE: In place operation - - void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); - - void* conv1_tanh = tensorTanh(pool1out); - - // NOTE: input channels have to match between tensor op inputs and outputs - void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); // NOTE: In place operation - - void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); - - void* conv2_tanh = tensorTanh(pool2out); - - void* gemm1out = tensorGemmGPU(conv2_tanh, fc1_weights); - - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - - void* tanh1out = tensorTanh(gemm1biasout); - - void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights); - - void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); - - void* tanh2out = tensorTanh(gemm2_biasout); - - void* result = tensorSoftmax(tanh2out); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - total_energy += time_energy.second; - - profiler.reset(); - - // End profiling and dump output to profile.txt - stopProfiling(); - - final_accuracy += computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"Average energy: " << total_energy / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - final_accuracy = final_accuracy / total_runs; - dumpFinalAccuracy(final_accuracy); -} - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_cifar10_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_cifar10_profiling.cc deleted file mode 100644 index e84ca25f6fc3b44b02ce5b45f4517ba6ad6bc3be..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_cifar10_profiling.cc +++ /dev/null @@ -1,438 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/mobilenet_quant/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); - void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); - void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); - void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); - void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); - void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); - void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); - std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); - void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - - startMemTracking(); - startProfiling(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - double total_energy = 0.0; - - for(int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - profiler.resume_profiler(); - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorRelu(var_8); - void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorRelu(var_27); - void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_41 = tensorConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_46 = tensorRelu(var_45); - void* var_48 = tensorConvolution(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_49 = tensorBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); - void* var_52 = tensorBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_53 = tensorRelu(var_52); - void* var_55 = tensorConvolution(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_56 = tensorBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_57 = tensorRelu(var_56); - void* var_58 = tensorConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); - void* var_59 = tensorBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_60 = tensorRelu(var_59); - void* var_63 = tensorConvolution(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_64 = tensorBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_65 = tensorRelu(var_64); - void* var_66 = tensorConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_68 = tensorRelu(var_67); - void* var_70 = tensorConvolution(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_71 = tensorBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_72 = tensorRelu(var_71); - void* var_73 = tensorConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); - void* var_74 = tensorBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_75 = tensorRelu(var_74); - void* var_77 = tensorConvolution(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_78 = tensorBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_79 = tensorRelu(var_78); - void* var_80 = tensorConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); - void* var_81 = tensorBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_82 = tensorRelu(var_81); - void* var_85 = tensorConvolution(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_86 = tensorBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_87 = tensorRelu(var_86); - void* var_88 = tensorConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); - void* var_89 = tensorBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_90 = tensorRelu(var_89); - void* var_92 = tensorConvolution(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_93 = tensorBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_94 = tensorRelu(var_93); - void* var_95 = tensorConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); - void* var_96 = tensorBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_97 = tensorRelu(var_96); - void* var_99 = tensorPooling(var_97,1,2,2,0,0,2,2); - void* var_101 = tensorGemmGPU(var_99, dense_1_w); - void* var_102 = tensorAdd(var_101, dense_1_b); - void* var_103 = tensorSoftmax(var_102); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - total_energy += time_energy.second; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_103); - final_accuracy += accuracy; - freeBatchMemory(); - } - } - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"Average energy: " << total_energy / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_depthwise_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_depthwise_profiling.cc deleted file mode 100644 index 3dcce8ada9c74a439440594eb7df8a33c169cf6c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_depthwise_profiling.cc +++ /dev/null @@ -1,415 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/mobilenet/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); - void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); - void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); - void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); - void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); - void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); - void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); - std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); - void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - - int total_runs = 10; - float final_accuracy = 0.0; - - for (int run_num = 0; run_num < total_runs; run_num++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorRelu(var_8); - void* var_11 = tensorConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_18 = tensorConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_26 = tensorConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorRelu(var_27); - void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_41 = tensorConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_46 = tensorRelu(var_45); - void* var_48 = tensorConvCutlass(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_49 = tensorBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); - void* var_52 = tensorBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_53 = tensorRelu(var_52); - void* var_55 = tensorConvCutlass(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_56 = tensorBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_57 = tensorRelu(var_56); - void* var_58 = tensorConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); - void* var_59 = tensorBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_60 = tensorRelu(var_59); - void* var_63 = tensorConvCutlass(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_64 = tensorBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_65 = tensorRelu(var_64); - void* var_66 = tensorConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_68 = tensorRelu(var_67); - void* var_70 = tensorConvCutlass(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_71 = tensorBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_72 = tensorRelu(var_71); - void* var_73 = tensorConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); - void* var_74 = tensorBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_75 = tensorRelu(var_74); - void* var_77 = tensorConvCutlass(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_78 = tensorBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_79 = tensorRelu(var_78); - void* var_80 = tensorConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); - void* var_81 = tensorBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_82 = tensorRelu(var_81); - void* var_85 = tensorConvCutlass(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_86 = tensorBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_87 = tensorRelu(var_86); - void* var_88 = tensorConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); - void* var_89 = tensorBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_90 = tensorRelu(var_89); - void* var_92 = tensorConvCutlass(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_93 = tensorBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_94 = tensorRelu(var_93); - void* var_95 = tensorConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); - void* var_96 = tensorBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_97 = tensorRelu(var_96); - void* var_99 = tensorPooling(var_97,1,2,2,0,0,2,2); - void* var_101 = tensorGemmGPU(var_99, dense_1_w); - void* var_102 = tensorAdd(var_101, dense_1_b); - void* var_103 = tensorSoftmax(var_102); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_103); - final_accuracy += accuracy; - freeBatchMemory(); - } - } - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_depthwise_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_depthwise_profiling.cc deleted file mode 100644 index 5af17774b41d4d265e110dd988e3458442312226..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_depthwise_profiling.cc +++ /dev/null @@ -1,247 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(int argc, char* argv[]){ - - int total_runs = 10; - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - - llvm_hpvm_initTensorRt(0); - - //std::string dir_prefix = std::string("../../keras/data/mobilenet_shallow_nathan/"); - - std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); - - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 500; - int batch_count = test_input_size / batch_size; - - - float final_accuracy = 0.0; - - for(int j = 0; j < total_runs; j++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorRelu(var_8); - void* var_11 = tensorConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_18 = tensorConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_26 = tensorConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorRelu(var_27); - void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_41 = tensorConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_46 = tensorRelu(var_45); - void* var_47 = tensorPooling(var_46,1,2,2,0,0,2,2); - void* var_49 = tensorGemmGPU(var_47, dense_1_w); - void* var_50 = tensorAdd(var_49, dense_1_b); - void* var_51 = tensorSoftmax(var_50); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_51); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - //final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy / batch_count); - } - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - //dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_profiling.cc deleted file mode 100644 index c60f15e3cb71d9fa81b444be8348803e1d7891d6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/mobilenet_shallow_profiling.cc +++ /dev/null @@ -1,227 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/mobilenet_shallow/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - double total_energy = 0.0; - - for(int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - profiler.resume_profiler(); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorRelu(var_8); - void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 2, 2, 1, 64); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorRelu(var_27); - void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_40 = tensorPooling(var_38,1,2,2,0,0,2,2); - void* var_42 = tensorGemmGPU(var_40, dense_1_w); - void* var_43 = tensorAdd(var_42, dense_1_b); - void* var_44 = tensorSoftmax(var_43); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - total_energy += time_energy.second; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_44); - final_accuracy += accuracy; - freeBatchMemory(); - } - } - - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"Average energy: " << total_energy / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/resnet18_cifar10_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/resnet18_cifar10_profiling.cc deleted file mode 100644 index 1b0664200170235e2d0dac5682108de97b094776..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/resnet18_cifar10_profiling.cc +++ /dev/null @@ -1,245 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); - std::string input_path = dir_prefix + std::string("input.bin"); - //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); - std::string labels_path = dir_prefix + std::string("labels.bin"); - //uint8_t* labels = readLabels(labels_path.c_str(), batch_size); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; - - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - double total_energy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - - for(int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - profiler.resume_profiler(); - - void* var_2 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_3 = tensorAdd(var_2, conv2d_1_b); - void* var_4 = tensorRelu(var_3); - void* var_6 = tensorConvolution(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_7 = tensorAdd(var_6, conv2d_2_b); - void* var_8 = tensorRelu(var_7); - void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_11 = tensorAdd(var_10, conv2d_3_b); - void* var_12 = tensorAdd(var_4, var_11); - void* var_13 = tensorRelu(var_12); - void* var_15 = tensorConvolution(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_16 = tensorAdd(var_15, conv2d_4_b); - void* var_17 = tensorRelu(var_16); - void* var_19 = tensorConvolution(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_20 = tensorAdd(var_19, conv2d_5_b); - void* var_21 = tensorAdd(var_13, var_20); - void* var_22 = tensorRelu(var_21); - void* var_24 = tensorConvolution(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorAdd(var_24, conv2d_6_b); - void* var_26 = tensorRelu(var_25); - void* var_28 = tensorConvolution(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorAdd(var_28, conv2d_7_b); - void* var_30 = tensorAdd(var_22, var_29); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvolution(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0); - void* var_34 = tensorAdd(var_33, conv2d_8_b); - void* var_35 = tensorRelu(var_34); - void* var_37 = tensorConvolution(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_38 = tensorAdd(var_37, conv2d_9_b); - void* var_40 = tensorConvolution(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0); - void* var_41 = tensorAdd(var_40, conv2d_10_b); - void* var_42 = tensorAdd(var_41, var_38); - void* var_43 = tensorRelu(var_42); - void* var_45 = tensorConvolution(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_46 = tensorAdd(var_45, conv2d_11_b); - void* var_47 = tensorRelu(var_46); - void* var_49 = tensorConvolution(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_50 = tensorAdd(var_49, conv2d_12_b); - void* var_51 = tensorAdd(var_43, var_50); - void* var_52 = tensorRelu(var_51); - void* var_54 = tensorConvolution(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_55 = tensorAdd(var_54, conv2d_13_b); - void* var_56 = tensorRelu(var_55); - void* var_58 = tensorConvolution(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0); - void* var_59 = tensorAdd(var_58, conv2d_14_b); - void* var_60 = tensorAdd(var_52, var_59); - void* var_61 = tensorRelu(var_60); - void* var_63 = tensorConvolution(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0); - void* var_64 = tensorAdd(var_63, conv2d_15_b); - void* var_65 = tensorRelu(var_64); - void* var_67 = tensorConvolution(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0); - void* var_68 = tensorAdd(var_67, conv2d_16_b); - void* var_70 = tensorConvolution(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0); - void* var_71 = tensorAdd(var_70, conv2d_17_b); - void* var_72 = tensorAdd(var_71, var_68); - void* var_73 = tensorRelu(var_72); - void* var_75 = tensorConvolution(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0); - void* var_76 = tensorAdd(var_75, conv2d_18_b); - void* var_77 = tensorRelu(var_76); - void* var_79 = tensorConvolution(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0); - void* var_80 = tensorAdd(var_79, conv2d_19_b); - void* var_81 = tensorAdd(var_73, var_80); - void* var_82 = tensorRelu(var_81); - void* var_84 = tensorConvolution(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0); - void* var_85 = tensorAdd(var_84, conv2d_20_b); - void* var_86 = tensorRelu(var_85); - void* var_88 = tensorConvolution(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0); - void* var_89 = tensorAdd(var_88, conv2d_21_b); - void* var_90 = tensorAdd(var_82, var_89); - void* var_91 = tensorRelu(var_90); - void* var_92 = tensorPooling(var_91,1,8,8,0,0,8,8); - void* var_94 = tensorGemmGPU(var_92, dense_1_w); - void* var_95 = tensorAdd(var_94, dense_1_b); - void* var_96 = tensorSoftmax(var_95); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - total_energy += time_energy.second; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_96); - final_accuracy += accuracy; - - freeBatchMemory(); - } - } - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"Average energy: " << total_energy / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar100_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar100_profiling.cc deleted file mode 100644 index 70246dac4b3d43550f49a0a653d1c13396f3a84a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar100_profiling.cc +++ /dev/null @@ -1,184 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 500; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - int total_runs = 10; - Profiler profiler; - profiler.start_profiler(); - double total_time = 0.0; - double total_energy = 0.0; - - for (int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - profiler.resume_profiler(); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_5 = tensorAdd(var_4, conv2d_2_b); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); - void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_9 = tensorAdd(var_8, conv2d_3_b); - void* var_10 = tensorRelu(var_9); - void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_13 = tensorAdd(var_12, conv2d_4_b); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); - void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorAdd(var_16, conv2d_5_b); - void* var_18 = tensorRelu(var_17); - void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_21 = tensorAdd(var_20, conv2d_6_b); - void* var_22 = tensorRelu(var_21); - void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorAdd(var_24, conv2d_7_b); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); - void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorAdd(var_28, conv2d_8_b); - void* var_30 = tensorRelu(var_29); - void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_33 = tensorAdd(var_32, conv2d_9_b); - void* var_34 = tensorRelu(var_33); - void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); - void* var_37 = tensorAdd(var_36, conv2d_10_b); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); - void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_41 = tensorAdd(var_40, conv2d_11_b); - void* var_42 = tensorRelu(var_41); - void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_45 = tensorAdd(var_44, conv2d_12_b); - void* var_46 = tensorRelu(var_45); - void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_49 = tensorAdd(var_48, conv2d_13_b); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); - void* var_54 = tensorGemmGPU(var_51, dense_1_w); - void* var_55 = tensorAdd(var_54, dense_1_b); - void* var_56 = tensorRelu(var_55); - void* var_58 = tensorGemmGPU(var_56, dense_2_w); - void* var_59 = tensorAdd(var_58, dense_2_b); - void* var_60 = tensorSoftmax(var_59); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - total_energy += time_energy.second; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_60, 100); - final_accuracy += accuracy; - freeBatchMemory(); - - } - } - - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"Average energy: " << total_energy / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar10_profiling.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar10_profiling.cc deleted file mode 100644 index 7ed583884a3fa2fca745bde4d27f8ca92cfcda02..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/profiling/vgg16_cifar10_profiling.cc +++ /dev/null @@ -1,185 +0,0 @@ -#include "/home/nvidia/Gitlab/hpvm/llvm/projects/gpu_profiler/include/profiler.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/vgg16_cifar10_2/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 500; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - Profiler profiler; - profiler.start_profiler(); - - double total_time = 0.0; - double total_energy = 0.0; - - int total_runs = 10; - for(int i = 0; i < total_runs; i++){ - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - profiler.resume_profiler(); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_5 = tensorAdd(var_4, conv2d_2_b); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); - void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_9 = tensorAdd(var_8, conv2d_3_b); - void* var_10 = tensorRelu(var_9); - void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_13 = tensorAdd(var_12, conv2d_4_b); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); - void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorAdd(var_16, conv2d_5_b); - void* var_18 = tensorRelu(var_17); - void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_21 = tensorAdd(var_20, conv2d_6_b); - void* var_22 = tensorRelu(var_21); - void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorAdd(var_24, conv2d_7_b); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); - void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorAdd(var_28, conv2d_8_b); - void* var_30 = tensorRelu(var_29); - void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_33 = tensorAdd(var_32, conv2d_9_b); - void* var_34 = tensorRelu(var_33); - void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); - void* var_37 = tensorAdd(var_36, conv2d_10_b); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); - void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_41 = tensorAdd(var_40, conv2d_11_b); - void* var_42 = tensorRelu(var_41); - void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_45 = tensorAdd(var_44, conv2d_12_b); - void* var_46 = tensorRelu(var_45); - void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_49 = tensorAdd(var_48, conv2d_13_b); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); - void* var_54 = tensorGemmGPU(var_51, dense_1_w); - void* var_55 = tensorAdd(var_54, dense_1_b); - void* var_56 = tensorRelu(var_55); - void* var_58 = tensorGemmGPU(var_56, dense_2_w); - void* var_59 = tensorAdd(var_58, dense_2_b); - void* var_60 = tensorSoftmax(var_59); - - profiler.pause_profiler(); - auto time_energy = profiler.get_time_energy(); - total_time += time_energy.first; - total_energy += time_energy.second; - profiler.reset(); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_60); - final_accuracy += accuracy; - - freeBatchMemory(); - } - } - profiler.stop_profiler(); - - std::cout<<"---------------------------------------\n"; - std::cout<<"Average time: " << total_time / total_runs << '\n'; - std::cout<<"Average energy: " << total_energy / total_runs << '\n'; - std::cout<<"---------------------------------------\n"; - - final_accuracy = final_accuracy / batch_count / total_runs; - dumpFinalAccuracy(final_accuracy); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_cifar10_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_cifar10_promise.cc deleted file mode 100644 index fbc9d038505313adefdf9100a1e55e3a98d823f8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_cifar10_promise.cc +++ /dev/null @@ -1,163 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 100000; - } - - printf("********* Lenet-2 Architecture ********** \n"); - - int test_batch_size = 5000; - - uint8_t* labels = readLabels("../model_params/alexnet2_cifar10/test_labels.bin", test_batch_size); - - for(int i = 0; i < total_runs; i++){ - - void* input = readTrainedWeights("../model_params/alexnet2_cifar10/norm_cifar_input.bin", - float_type, - test_batch_size, 3, 32, 32); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin", - float_type, 32, 3, 3, 3); - void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin", - float_type, 32, 32, 3, 3); - void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin", - float_type, 1, 32, 1, 1); - void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin", - float_type, 64, 32, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin", - float_type, 1, 64, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin", - float_type, 64, 64, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin", - float_type, 1, 64, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin", - float_type, 128, 64, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin", - float_type, 1, 128, 1, 1); - void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin", - float_type, 128, 128, 3, 3); - void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin", - float_type, 1, 128, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin", - float_type, 1, 1, 2048, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performance profiling - startProfiling(); - - //-1.881, 2.09 - //-0.18,0.174 - void* conv1_out = ConvLayer_PROMISE(input, -1.881, 2.09, conv1_filter, -0.542,0.371, conv1_bias, -0.066,0.04, - 1, 1, 1, 1, 0, 0, 0, -1,1, 9); - - void* conv2_out = ConvLayer_PROMISE(conv1_out, -1,1, conv2_filter, -0.424,0.314, conv2_bias, -0.355,-0.172, - 1, 1, 1, 1, 0, 2, 0, -1,1, 9); - - void* conv3_out = ConvLayer_PROMISE(conv2_out, -1,1, conv3_filter, -0.441,0.795, conv3_bias, -0.804,0.753, - 1, 1, 1, 1, 0, 0, 0, -1,1, 9); - - void* conv4_out = ConvLayer_PROMISE(conv3_out, -1,1, conv4_filter, -0.288,0.31, conv4_bias, -0.635,0.29, - 1, 1, 1, 1, 0, 2, 0, -1,1, 9); - - void* conv5_out = ConvLayer_PROMISE(conv4_out, -1,1, conv5_filter, -0.279,0.376, conv5_bias, -1.13, 1.239, - 1, 1, 1, 1, 0, 0, 0, -1,1, 9); - - void* conv6_out = ConvLayer_PROMISE(conv5_out, -1,1, conv6_filter, -0.27,0.279, conv6_bias, -0.503,0.127, - 1, 1, 1, 1, 0, 2, 0, -1,1, 9); - - // No Activation - void* fc1_out = FCLayer_PROMISE(conv6_out, -1,1, fc1_weights, -0.242,0.584, fc1_bias, -0.537,0.558, -1, -1,1, 9); - - void* result = tensorSoftmax(fc1_out); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(1); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_promise_quant.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_promise_quant.cc deleted file mode 100644 index 3c3bc018518cf6ab3ff7dd7a608900308efa1e49..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_promise_quant.cc +++ /dev/null @@ -1,93 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(1); - - int total_runs = 1; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 5000; - int offset = 5000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10_test/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,128,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.8816435, 2.0934134, conv2d_1_w, -0.5421946, 0.3710851, conv2d_1_b, -0.06697306, 0.040868897, 1, 1, 1, 1, -1, 0, 0, -0.7750273948907852, 0.7799443006515503, 9); - void* var_1 = ConvLayer_PROMISE(var_0, -0.7750273948907852, 0.7799443006515503, conv2d_2_w, -0.42474225, 0.31460348, conv2d_2_b, -0.3557253, -0.17281663, 1, 1, 1, 1, 0, 2, 0, -0.8086670643091202, 0.98395329773426, 9); - void* var_2 = ConvLayer_PROMISE(var_1, -0.8086670643091202, 0.98395329773426, conv2d_3_w, -0.44134507, 0.79587924, conv2d_3_b, -0.80424446, 0.75330096, 1, 1, 1, 1, -1, 0, 0, -0.9956784248352051, 0.9985664486885071, 9); - void* var_3 = ConvLayer_PROMISE(var_2, -0.9956784248352051, 0.9985664486885071, conv2d_4_w, -0.2883836, 0.31025785, conv2d_4_b, -0.6353164, 0.29015934, 1, 1, 1, 1, 0, 2, 0, -0.9932191967964172, 0.9923790097236633, 9); - void* var_4 = ConvLayer_PROMISE(var_3, -0.9932191967964172, 0.9923790097236633, conv2d_5_w, -0.2792431, 0.37689754, conv2d_5_b, -1.1379756, 1.2391574, 1, 1, 1, 1, -1, 0, 0, -0.9999013543128967, 0.9999108910560608, 9); - void* var_5 = ConvLayer_PROMISE(var_4, -0.9999013543128967, 0.9999108910560608, conv2d_6_w, -0.27078503, 0.27942517, conv2d_6_b, -0.503003, 0.12762362, 1, 1, 1, 1, 0, 2, 0, -0.991036117374897, 0.9714049702882765, 9); - void* var_6 = FCLayer_PROMISE(var_5, -0.991036117374897, 0.9714049702882765, dense_1_w, -0.24273404, 0.5845544, dense_1_b, -0.53745, 0.558251, -1, -119.27973731994629, -25.226281957626327, 9); - void* var_7 = tensorSoftmax(var_6); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_7); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_valid.cc deleted file mode 100644 index 1799480796fb988d0e9624fb482339d2345d2728..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet2_valid.cc +++ /dev/null @@ -1,92 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - int total_runs = 1; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 5000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10_test/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,128,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.8816435, 2.0934134, conv2d_1_w, -0.5421946, 0.3710851, conv2d_1_b, -0.06697306, 0.040868897, 1, 1, 1, 1, -1, 0, 0, -0.7750273948907852, 0.7799443006515503, 9); - void* var_1 = ConvLayer_PROMISE(var_0, -0.7750273948907852, 0.7799443006515503, conv2d_2_w, -0.42474225, 0.31460348, conv2d_2_b, -0.3557253, -0.17281663, 1, 1, 1, 1, 0, 2, 0, -0.8086670643091202, 0.98395329773426, 9); - void* var_2 = ConvLayer_PROMISE(var_1, -0.8086670643091202, 0.98395329773426, conv2d_3_w, -0.44134507, 0.79587924, conv2d_3_b, -0.80424446, 0.75330096, 1, 1, 1, 1, -1, 0, 0, -0.9956784248352051, 0.9985664486885071, 9); - void* var_3 = ConvLayer_PROMISE(var_2, -0.9956784248352051, 0.9985664486885071, conv2d_4_w, -0.2883836, 0.31025785, conv2d_4_b, -0.6353164, 0.29015934, 1, 1, 1, 1, 0, 2, 0, -0.9932191967964172, 0.9923790097236633, 9); - void* var_4 = ConvLayer_PROMISE(var_3, -0.9932191967964172, 0.9923790097236633, conv2d_5_w, -0.2792431, 0.37689754, conv2d_5_b, -1.1379756, 1.2391574, 1, 1, 1, 1, -1, 0, 0, -0.9999013543128967, 0.9999108910560608, 9); - void* var_5 = ConvLayer_PROMISE(var_4, -0.9999013543128967, 0.9999108910560608, conv2d_6_w, -0.27078503, 0.27942517, conv2d_6_b, -0.503003, 0.12762362, 1, 1, 1, 1, 0, 2, 0, -0.991036117374897, 0.9714049702882765, 9); - void* var_6 = FCLayer_PROMISE(var_5, -0.991036117374897, 0.9714049702882765, dense_1_w, -0.24273404, 0.5845544, dense_1_b, -0.53745, 0.558251, -1, -119.27973731994629, -25.226281957626327, 9); - void* var_7 = tensorSoftmax(var_6); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_7); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_cifar10_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_cifar10_promise.cc deleted file mode 100644 index 3e39f5cf03ce25511429d84ada9812fef0998194..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_cifar10_promise.cc +++ /dev/null @@ -1,158 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 100000; - } - - printf("********* Lenet-2 Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 5000; - - uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size); - - for(int i = 0; i < total_runs; i++){ - - void* input = readTrainedWeights("../model_params/alexnet_cifar10/norm_cifar_input.bin", - float_type, - test_batch_size, 3, 32, 32); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv1.bin", - float_type, 64, 3, 11, 11); - void* conv1_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv1_bias.bin", - float_type, 1, 64, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv2.bin", - float_type, 192, 64, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv2_bias.bin", - float_type, 1, 192, 1, 1); - - void* conv3_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv3.bin", - float_type, 384, 192, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv3_bias.bin", - float_type, 1, 384, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv4.bin", - float_type, 256, 384, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv4_bias.bin", - float_type, 1, 256, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv5.bin", - float_type, 256, 256, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv5_bias.bin", - float_type, 1, 256, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet_cifar10/fc1.bin", - float_type, 1, 1, 4096, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performance profiling - startProfiling(); - - //-1.881, 2.09 - //-0.18,0.174 - void* conv1_out = ConvLayer_PROMISE(input, -1.881, 2.09, conv1_filter, -0.345,0.331, conv1_bias, -0.76,0.59, - 5, 5, 1, 1, 0, 2, 0, -1,1, 9); - - void* conv2_out = ConvLayer_PROMISE(conv1_out, -1,1, conv2_filter, -0.22,0.264, conv2_bias, -0.448,0.343, - 2, 2, 1, 1, 0, 2, 0, -1,1, 9); - - void* conv3_out = ConvLayer_PROMISE(conv2_out, -1,1, conv3_filter, -0.192,0.187, conv3_bias, -0.91,0.67, - 1, 1, 1, 1, 0, 0, 0, -1,1, 9); - - void* conv4_out = ConvLayer_PROMISE(conv3_out, -1,1, conv4_filter, -0.131,0.141, conv4_bias, -0.416,0.342, - 1, 1, 1, 1, 0, 0, 0, -1,1, 9); - - void* conv5_out = ConvLayer_PROMISE(conv4_out, -1,1, conv5_filter, -0.165,0.188, conv5_bias, -0.283,0.062, - 1, 1, 1, 1, 0, 2, 0, -1,1, 9); - - // No Activation - void* fc1_out = FCLayer_PROMISE(conv5_out, -1,1, fc1_weights, -0.181,0.233, fc1_bias, -0.063,0.137, -1, -1,1, 9); - - void* result = tensorSoftmax(fc1_out); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(1); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_promise_quant.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_promise_quant.cc deleted file mode 100644 index 6b2b0e80ba92fa449cdd06036946101df76317e7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_promise_quant.cc +++ /dev/null @@ -1,90 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -int main(){ - - llvm_hpvm_initTensorRt(1); - - int total_runs = 1; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 5000; - int offset = 5000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../model_params/alexnet_cifar10_test/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.8816426241908337, 2.0934095498544254, conv2d_1_w, -0.33087718, 0.3323643, conv2d_1_b, -0.7782218, 0.6020472, 5, 5, 1, 1, 0, 2, 0, -0.978641152381897, 0.9989452958106995, 9); - void* var_1 = ConvLayer_PROMISE(var_0, -0.978641152381897, 0.9989452958106995, conv2d_2_w, -0.2095158, 0.33543423, conv2d_2_b, -0.45020863, 0.30596754, 2, 2, 1, 1, 0, 2, 0, -0.9997039437294006, 0.999930202960968, 9); - void* var_2 = ConvLayer_PROMISE(var_1, -0.9997039437294006, 0.999930202960968, conv2d_3_w, -0.1715614, 0.17037082, conv2d_3_b, -0.6519161, 0.5939945, 1, 1, 1, 1, -1, 0, 0, -0.9999336004257202, 0.999940037727356, 9); - void* var_3 = ConvLayer_PROMISE(var_2, -0.9999336004257202, 0.999940037727356, conv2d_4_w, -0.15575546, 0.14456555, conv2d_4_b, -0.55873865, 0.4704539, 1, 1, 1, 1, -1, 0, 0, -0.9999991059303284, 0.9999993443489075, 9); - void* var_4 = ConvLayer_PROMISE(var_3, -0.9999991059303284, 0.9999993443489075, conv2d_5_w, -0.16108225, 0.16864482, conv2d_5_b, -0.22135437, 0.10401678, 1, 1, 1, 1, 0, 2, 0, -0.9994344115257263, 0.9996342062950134, 9); - void* var_5 = FCLayer_PROMISE(var_4, -0.9994344115257263, 0.9996342062950134, dense_1_w, -0.18183032, 0.19018902, dense_1_b, -0.07189204, 0.106005594, -1, -15.076565380096437, 19.422585220336913, 9); - void* var_6 = tensorSoftmax(var_5); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_6); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - //dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_valid.cc deleted file mode 100644 index 7a0a40adb30367866635993de3de94ca1413938e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/alexnet_valid.cc +++ /dev/null @@ -1,90 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -int main(){ - - llvm_hpvm_initTensorRt(0); - - int total_runs = 20; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 5000; - int offset = 0; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../model_params/alexnet_cifar10_test/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.8816426241908337, 2.0934095498544254, conv2d_1_w, -0.33087718, 0.3323643, conv2d_1_b, -0.7782218, 0.6020472, 5, 5, 1, 1, 0, 2, 0, -0.978641152381897, 0.9989452958106995, 9); - void* var_1 = ConvLayer_PROMISE(var_0, -0.978641152381897, 0.9989452958106995, conv2d_2_w, -0.2095158, 0.33543423, conv2d_2_b, -0.45020863, 0.30596754, 2, 2, 1, 1, 0, 2, 0, -0.9997039437294006, 0.999930202960968, 9); - void* var_2 = ConvLayer_PROMISE(var_1, -0.9997039437294006, 0.999930202960968, conv2d_3_w, -0.1715614, 0.17037082, conv2d_3_b, -0.6519161, 0.5939945, 1, 1, 1, 1, -1, 0, 0, -0.9999336004257202, 0.999940037727356, 9); - void* var_3 = ConvLayer_PROMISE(var_2, -0.9999336004257202, 0.999940037727356, conv2d_4_w, -0.15575546, 0.14456555, conv2d_4_b, -0.55873865, 0.4704539, 1, 1, 1, 1, -1, 0, 0, -0.9999991059303284, 0.9999993443489075, 9); - void* var_4 = ConvLayer_PROMISE(var_3, -0.9999991059303284, 0.9999993443489075, conv2d_5_w, -0.16108225, 0.16864482, conv2d_5_b, -0.22135437, 0.10401678, 1, 1, 1, 1, 0, 2, 0, -0.9994344115257263, 0.9996342062950134, 9); - void* var_5 = FCLayer_PROMISE(var_4, -0.9994344115257263, 0.9996342062950134, dense_1_w, -0.18183032, 0.19018902, dense_1_b, -0.07189204, 0.106005594, -1, -15.076565380096437, 19.422585220336913, 9); - void* var_6 = tensorSoftmax(var_5); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_6); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_25.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_25.cpp deleted file mode 100644 index 377082d74e85d8394e097d9281003d482131bae5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_25.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <algorithm> -#include <cassert> -#include <fstream> -#include <string> - -const size_t n_channels = 3; - -Tensor *gaussianFilter_(float div) { - std::vector<float> gauss_data = {1, 4, 6, 4, 1, 4, 16, 24, 16, - 4, 6, 24, 36, 24, 6, 4, 16, 24, - 16, 4, 1, 4, 6, 4, 1}; - for (float &f : gauss_data) - f /= div; - return (Tensor *)createFilterFromData( - CUDNN_DATA_FLOAT, gauss_data.data(), 5, 5, 1); -} - -Tensor *gaussianFilter() { return gaussianFilter_(16.0); } - -void *normalize(void *image) { - auto *max_1D = autotuner_tensorReduce(image, 2, MathOp::Max); - auto *max = autotuner_tensorReduce(max_1D, 3, MathOp::Max); - auto *img_norm = autotuner_tensorMap2(MathOp::Div, image, max); - return img_norm; -} - -extern std::vector<size_t> sizes(Tensor *t); - -void forward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[1] == 3); - sz[0] = sz[0] * sz[1]; - sz[1] = 1; - reshape(tensor, sz); -} - -void backward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[0] % 3 == 0); - sz[0] = sz[0] / 3; - sz[1] = 3; - reshape(tensor, sz); -} - -void *sharpen(void *image) { - void *gaussian = gaussianFilter(); - forward_reshape(image); - void *blurred = ConvLayer_PROMISE( - image, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 2, 2, 1, 1, 0, 0, - -1, 0.0, 0.0, 0); - backward_reshape(blurred); - backward_reshape(image); - void *blurred_norm = normalize(blurred); - void *image_norm = normalize(image); - void *ret = - autotuner_tensorMap2(MathOp::AddWeighted, blurred_norm, image_norm); - return ret; -} - -void *main_procedure(void *fg, void *bg) { - void *g_bg = sharpen(bg); - void *g_fg = sharpen(fg); - void *ret = autotuner_tensorMap2(MathOp::Blend2, g_bg, g_fg); - return ret; -} - -const size_t batch_size = 500, total_max = 3000; -const float psnr_threshold = 25.0; - -int main() { - const char *input1_path = "../model_params/image_processing_5k"; - const char *input2_path = "../model_params/image_processing_5k_shuffled"; - const char *ref_output_path = "../model_params/blending_ref_output"; - std::vector<float> psnr; - - llvm_hpvm_initTensorRt(0); - startMemTracking(); - size_t bstart = 0; - while (true) { - auto *background = readDataSet(input1_path, bstart, batch_size, n_channels), - *foreground = readDataSet(input2_path, bstart, batch_size, n_channels); - if (!background || !foreground) - break; - - auto *result = main_procedure(foreground, background); - - auto *ref_output = readDataSet(ref_output_path, bstart, batch_size, n_channels); - std::vector<float> psnr_batch = PSNR(ref_output, result); - std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr)); - bstart += batch_size; - if (bstart >= total_max) - break; - freeBatchMemory(); - } - float violation = violationRate(psnr, psnr_threshold); - float mean_psnr = mean(psnr); - std::ofstream of("final_accuracy"); - of << violation * 100 << ", " << mean_psnr << '\n'; - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_30.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_30.cpp deleted file mode 100644 index 5988eb11fd63b34e045445f9fab98c4e934e646d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/blend_30.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <algorithm> -#include <cassert> -#include <fstream> -#include <string> - -const size_t n_channels = 3; - -Tensor *gaussianFilter_(float div) { - std::vector<float> gauss_data = {1, 4, 6, 4, 1, 4, 16, 24, 16, - 4, 6, 24, 36, 24, 6, 4, 16, 24, - 16, 4, 1, 4, 6, 4, 1}; - for (float &f : gauss_data) - f /= div; - return (Tensor *)createFilterFromData( - CUDNN_DATA_FLOAT, gauss_data.data(), 5, 5, 1); -} - -Tensor *gaussianFilter() { return gaussianFilter_(16.0); } - -void *normalize(void *image) { - auto *max_1D = autotuner_tensorReduce(image, 2, MathOp::Max); - auto *max = autotuner_tensorReduce(max_1D, 3, MathOp::Max); - auto *img_norm = autotuner_tensorMap2(MathOp::Div, image, max); - return img_norm; -} - -extern std::vector<size_t> sizes(Tensor *t); - -void forward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[1] == 3); - sz[0] = sz[0] * sz[1]; - sz[1] = 1; - reshape(tensor, sz); -} - -void backward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[0] % 3 == 0); - sz[0] = sz[0] / 3; - sz[1] = 3; - reshape(tensor, sz); -} - -void *sharpen(void *image) { - void *gaussian = gaussianFilter(); - forward_reshape(image); - void *blurred = ConvLayer_PROMISE( - image, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 2, 2, 1, 1, 0, 0, - -1, 0.0, 0.0, 0); - backward_reshape(blurred); - backward_reshape(image); - void *blurred_norm = normalize(blurred); - void *image_norm = normalize(image); - void *ret = - autotuner_tensorMap2(MathOp::AddWeighted, blurred_norm, image_norm); - return ret; -} - -void *main_procedure(void *fg, void *bg) { - void *g_bg = sharpen(bg); - void *g_fg = sharpen(fg); - void *ret = autotuner_tensorMap2(MathOp::Blend2, g_bg, g_fg); - return ret; -} - -const size_t batch_size = 500, total_max = 3000; -const float psnr_threshold = 30.0; - -int main() { - const char *input1_path = "../model_params/image_processing_5k"; - const char *input2_path = "../model_params/image_processing_5k_shuffled"; - const char *ref_output_path = "../model_params/blending_ref_output"; - std::vector<float> psnr; - - llvm_hpvm_initTensorRt(1); - startMemTracking(); - size_t bstart = 0; - while (true) { - auto *background = readDataSet(input1_path, bstart, batch_size, n_channels), - *foreground = readDataSet(input2_path, bstart, batch_size, n_channels); - if (!background || !foreground) - break; - - auto *result = main_procedure(foreground, background); - - auto *ref_output = readDataSet(ref_output_path, bstart, batch_size, n_channels); - std::vector<float> psnr_batch = PSNR(ref_output, result); - std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr)); - bstart += batch_size; - if (bstart >= total_max) - break; - freeBatchMemory(); - } - float violation = violationRate(psnr, psnr_threshold); - float mean_psnr = mean(psnr); - std::ofstream of("final_accuracy"); - of << violation * 100 << ", " << mean_psnr << '\n'; - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_25.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_25.cpp deleted file mode 100644 index 4024e542f5338df556b7ea51a171fb0ce04afce8..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_25.cpp +++ /dev/null @@ -1,88 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <fstream> - -Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { - int64_t m = (w - 1) / 2, n = (h - 1) / 2; - auto *data = new float[w * h]; - float sum = 0.0f; - for (int64_t i = -m; i <= m; i++) - for (int64_t j = -n; j <= n; j++) { - size_t idx = (i + m) * h + (j + n); - float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); - data[idx] = exp(exponent); - sum += data[idx]; - } - if (sum != 0.0f) - for (size_t i = 0; i < w * h; i++) - data[i] /= sum; - return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); -} - -std::pair<Tensor *, Tensor *> getSobelKernels() { - std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); - std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); - auto *t1 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1); - auto *t2 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1); - return std::make_pair(t1, t2); -} - -void *main_procedure(void *dataset) { - Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1); - Tensor *kernel_x, *kernel_y; - std::tie(kernel_x, kernel_y) = getSobelKernels(); - - // 0. Grayscale - auto *summed_image = autotuner_tensorReduce(dataset, 1, MathOp::Add); - auto *grayscale_image = autotuner_tensorMap1(MathOp::Avg3, summed_image); - // 1. Denoise - auto *image2 = ConvLayer_PROMISE( - grayscale_image, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 2, 2, 1, - 1, 0, 0, -1, 0.0, 0.0, 0); - // 2. Get edge gradient / direction - auto *grad_x = ConvLayer_PROMISE( - image2, 0.0, 0.0, kernel_x, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0, - -1, 0.0, 0.0, 0); - auto *grad_y = ConvLayer_PROMISE( - image2, 0.0, 0.0, kernel_y, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0, - -1, 0.0, 0.0, 0); - auto *grad_mag = autotuner_tensorMap2(MathOp::Hypot, grad_x, grad_y); - // 2.5. Normalize grad magnitude - auto *grad_max_1D = autotuner_tensorReduce(grad_mag, 2, MathOp::Max); - auto *grad_max = autotuner_tensorReduce(grad_max_1D, 3, MathOp::Max); - auto *grad_mag_norm = autotuner_tensorMap2(MathOp::Div, grad_mag, grad_max); - return grad_mag_norm; -} - -const size_t batch_size = 500, total_max = 3000; -const float psnr_threshold = 25.0; - -int main() { - const char *input_path = "../model_params/image_processing_5k"; - const char *ref_output_path = "../model_params/canny_ref_output"; - std::vector<float> psnr; - llvm_hpvm_initTensorRt(0); - size_t bstart = 0; - startMemTracking(); - while (true) { - Tensor *batch = readDataSet(input_path, bstart, batch_size); - if (batch == nullptr) - break; - - auto *result = main_procedure(batch); - auto *ref_output = readDataSet(ref_output_path, bstart, batch_size, 1); - std::vector<float> psnr_batch = PSNR(ref_output, result); - std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr)); - bstart += batch_size; - if (bstart >= total_max) - break; - freeBatchMemory(); - } - float violation = violationRate(psnr, psnr_threshold); - float mean_psnr = mean(psnr); - std::ofstream of("final_accuracy"); - of << violation * 100 << ", " << mean_psnr << '\n'; - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_30.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_30.cpp deleted file mode 100644 index b3737ff204cf2cb1eb9ea7849f6b745cf4139dec..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/canny_30.cpp +++ /dev/null @@ -1,88 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <fstream> - -Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { - int64_t m = (w - 1) / 2, n = (h - 1) / 2; - auto *data = new float[w * h]; - float sum = 0.0f; - for (int64_t i = -m; i <= m; i++) - for (int64_t j = -n; j <= n; j++) { - size_t idx = (i + m) * h + (j + n); - float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); - data[idx] = exp(exponent); - sum += data[idx]; - } - if (sum != 0.0f) - for (size_t i = 0; i < w * h; i++) - data[i] /= sum; - return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); -} - -std::pair<Tensor *, Tensor *> getSobelKernels() { - std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); - std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); - auto *t1 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1); - auto *t2 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1); - return std::make_pair(t1, t2); -} - -void *main_procedure(void *dataset) { - Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1); - Tensor *kernel_x, *kernel_y; - std::tie(kernel_x, kernel_y) = getSobelKernels(); - - // 0. Grayscale - auto *summed_image = autotuner_tensorReduce(dataset, 1, MathOp::Add); - auto *grayscale_image = autotuner_tensorMap1(MathOp::Avg3, summed_image); - // 1. Denoise - auto *image2 = ConvLayer_PROMISE( - grayscale_image, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 2, 2, 1, - 1, 0, 0, -1, 0.0, 0.0, 0); - // 2. Get edge gradient / direction - auto *grad_x = ConvLayer_PROMISE( - image2, 0.0, 0.0, kernel_x, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0, - -1, 0.0, 0.0, 0); - auto *grad_y = ConvLayer_PROMISE( - image2, 0.0, 0.0, kernel_y, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, 0, - -1, 0.0, 0.0, 0); - auto *grad_mag = autotuner_tensorMap2(MathOp::Hypot, grad_x, grad_y); - // 2.5. Normalize grad magnitude - auto *grad_max_1D = autotuner_tensorReduce(grad_mag, 2, MathOp::Max); - auto *grad_max = autotuner_tensorReduce(grad_max_1D, 3, MathOp::Max); - auto *grad_mag_norm = autotuner_tensorMap2(MathOp::Div, grad_mag, grad_max); - return grad_mag_norm; -} - -const size_t batch_size = 500, total_max = 3000; -const float psnr_threshold = 30.0; - -int main() { - const char *input_path = "../model_params/image_processing_5k"; - const char *ref_output_path = "../model_params/canny_ref_output"; - std::vector<float> psnr; - llvm_hpvm_initTensorRt(1); - size_t bstart = 0; - startMemTracking(); - while (true) { - Tensor *batch = readDataSet(input_path, bstart, batch_size); - if (batch == nullptr) - break; - - auto *result = main_procedure(batch); - auto *ref_output = readDataSet(ref_output_path, bstart, batch_size, 1); - std::vector<float> psnr_batch = PSNR(ref_output, result); - std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr)); - bstart += batch_size; - if (bstart >= total_max) - break; - freeBatchMemory(); - } - float violation = violationRate(psnr, psnr_threshold); - float mean_psnr = mean(psnr); - std::ofstream of("final_accuracy"); - of << violation * 100 << ", " << mean_psnr << '\n'; - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_25.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_25.cpp deleted file mode 100644 index 7c4cc55f02b73dee6214ae0ebd14e921334b1f18..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_25.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <cmath> -#include <iostream> -#include <thrust/complex.h> -#include <vector> - -const size_t batch_size = 250, total_max = 3000; -const size_t n_colors = N_RGB_CHAN; -const float psnr_threshold = 25.0; - -Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { - int64_t m = (w - 1) / 2, n = (h - 1) / 2; - auto *data = new float[w * h]; - float sum = 0.0f; - for (int64_t i = -m; i <= m; i++) - for (int64_t j = -n; j <= n; j++) { - size_t idx = (i + m) * h + (j + n); - float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); - data[idx] = exp(exponent); - sum += data[idx]; - } - if (sum != 0.0f) - for (size_t i = 0; i < w * h; i++) - data[i] /= sum; - return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); -} - -extern std::vector<size_t> sizes(Tensor *t); - -void forward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[1] == 3); - sz[0] = sz[0] * sz[1]; - sz[1] = 1; - reshape(tensor, sz); -} - -void backward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[0] % 3 == 0); - sz[0] = sz[0] / 3; - sz[1] = 3; - reshape(tensor, sz); -} - -static void *normalize(void *image) { - auto *max_1D = autotuner_tensorReduce(image, 2, MathOp::Max); - auto *max = autotuner_tensorReduce(max_1D, 3, MathOp::Max); - auto *img_norm = autotuner_tensorMap2(MathOp::Div, image, max); - freeTensor(max_1D); - freeTensor(max); - return img_norm; -} - -void *main_procedure(void *dataset, void *gaussian) { - forward_reshape(dataset); - auto *sharpened = ConvLayer_PROMISE( - dataset, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, - 0, -1, 0.0, 0.0, 0); - backward_reshape(sharpened); - return normalize(sharpened); -} - -int main(int argc, char *argv[]) { - const char *input1_path = "../model_params/image_processing_5k_128_128"; - // const char *output_path = "output/"; - const char *ref_path = "../model_params/fft_ref_output"; - - std::vector<float> psnr; - llvm_hpvm_initTensorRt(0); - Tensor *gaussian = gaussianFilter(1, 3, 3, 1); - size_t bstart = 0; - startMemTracking(); - while (true) { - Tensor *batch = readDataSet(input1_path, bstart, batch_size); - if (batch == nullptr) // If end of dataset - break; - - auto *result = main_procedure(batch, gaussian); - - // saveDataSet(output_path, (Tensor *)result, bstart); - auto *ref_output = readDataSet(ref_path, bstart, batch_size, n_colors); - std::vector<float> psnr_batch = PSNR(ref_output, result); - std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr)); - - bstart += batch_size; - if (bstart >= total_max) - break; - freeBatchMemory(); - } - float violation = violationRate(psnr, psnr_threshold); - float mean_psnr = mean(psnr); - std::ofstream of("final_accuracy"); - of << violation * 100 << ", " << mean_psnr << '\n'; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_30.cpp b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_30.cpp deleted file mode 100644 index 3a7b413faa1795b1d14435875dff62ae08cb974b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/fft_30.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include <cmath> -#include <iostream> -#include <thrust/complex.h> -#include <vector> - -const size_t batch_size = 250, total_max = 3000; -const size_t n_colors = N_RGB_CHAN; -const float psnr_threshold = 30.0; - -Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { - int64_t m = (w - 1) / 2, n = (h - 1) / 2; - auto *data = new float[w * h]; - float sum = 0.0f; - for (int64_t i = -m; i <= m; i++) - for (int64_t j = -n; j <= n; j++) { - size_t idx = (i + m) * h + (j + n); - float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); - data[idx] = exp(exponent); - sum += data[idx]; - } - if (sum != 0.0f) - for (size_t i = 0; i < w * h; i++) - data[i] /= sum; - return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); -} - -extern std::vector<size_t> sizes(Tensor *t); - -void forward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[1] == 3); - sz[0] = sz[0] * sz[1]; - sz[1] = 1; - reshape(tensor, sz); -} - -void backward_reshape(void *t) { - auto *tensor = (Tensor *)t; - std::vector<size_t> sz = sizes(tensor); - assert(sz[0] % 3 == 0); - sz[0] = sz[0] / 3; - sz[1] = 3; - reshape(tensor, sz); -} - -static void *normalize(void *image) { - auto *max_1D = autotuner_tensorReduce(image, 2, MathOp::Max); - auto *max = autotuner_tensorReduce(max_1D, 3, MathOp::Max); - auto *img_norm = autotuner_tensorMap2(MathOp::Div, image, max); - freeTensor(max_1D); - freeTensor(max); - return img_norm; -} - -void *main_procedure(void *dataset, void *gaussian) { - forward_reshape(dataset); - auto *sharpened = ConvLayer_PROMISE( - dataset, 0.0, 0.0, gaussian, 0.0, 0.0, nullptr, 0.0, 0.0, 1, 1, 1, 1, 0, - 0, -1, 0.0, 0.0, 0); - backward_reshape(sharpened); - return normalize(sharpened); -} - -int main(int argc, char *argv[]) { - const char *input1_path = "../model_params/image_processing_5k_128_128"; - // const char *output_path = "output/"; - const char *ref_path = "../model_params/fft_ref_output"; - - std::vector<float> psnr; - llvm_hpvm_initTensorRt(1); - Tensor *gaussian = gaussianFilter(1, 3, 3, 1); - size_t bstart = 0; - startMemTracking(); - while (true) { - Tensor *batch = readDataSet(input1_path, bstart, batch_size); - if (batch == nullptr) // If end of dataset - break; - - auto *result = main_procedure(batch, gaussian); - - // saveDataSet(output_path, (Tensor *)result, bstart); - auto *ref_output = readDataSet(ref_path, bstart, batch_size, n_colors); - std::vector<float> psnr_batch = PSNR(ref_output, result); - std::copy(psnr_batch.begin(), psnr_batch.end(), std::back_inserter(psnr)); - - bstart += batch_size; - if (bstart >= total_max) - break; - freeBatchMemory(); - } - float violation = violationRate(psnr, psnr_threshold); - float mean_psnr = mean(psnr); - std::ofstream of("final_accuracy"); - of << violation * 100 << ", " << mean_psnr << '\n'; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/lenet_promise_relu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/lenet_promise_relu.cc deleted file mode 100644 index 5c7699026fe6e0860718e5986f4fec990ab08c6c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/lenet_promise_relu.cc +++ /dev/null @@ -1,65 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/lenet_relu/"); - std::string input_path = dir_prefix + std::string("input.bin"); - void* input = readTrainedWeights(input_path.c_str(), 0,10000,1,28,28); - std::string labels_path = dir_prefix + std::string("labels.bin"); - uint8_t* labels = readLabels(labels_path.c_str(),10000); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - void* var_0 = ConvLayer_PROMISE(input, 0.0, 1.0, conv2d_1_w, -0.2722561, 0.25817025, - conv2d_1_b, -0.041063767, 0.031912163, - 2, 2, 1, 1, 0, 2, 1, 0.0, 1.5512946, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.5512946, conv2d_2_w, -0.17580177, 0.16332611, - conv2d_2_b, -0.041385915, 0.05869476, - 2, 2, 1, 1, -1, 0, 1, 0.0, 4.916329, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 4.916329, conv2d_3_w, -0.20324017, 0.18275258, - conv2d_3_b, -0.039915435, 0.04589232, - 1, 1, 2, 2, -1, 0, 1, 0.0, 9.447418, 9); - void* var_3 = FCLayer_PROMISE(var_2, 0.0, 9.447418, dense_1_w, -0.10757191, 0.123126, - dense_1_b, -0.025070198, 0.027000334, 1, 0.0, 9.926857, 9); - void* var_4 = FCLayer_PROMISE(var_3, 0.0, 9.926857, dense_2_w, -0.18867673, 0.16425411, - dense_2_b, -0.012622595, 0.04586973, 1, 0.0, 42.018578, 9); - void* var_5 = tensorSoftmax(var_4); - - computeAccuracy2(labels,10000,var_5); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_quant.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_quant.cc deleted file mode 100644 index 3cb28def9b48bf29f3cffd5611991b0fbaeb4c55..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_quant.cc +++ /dev/null @@ -1,419 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(1); - -int total_runs = 1; -for (int i = 0 ; i < total_runs; i++){ - - -startMemTracking(); - -int test_input_size = 10000; -int batch_size = 2000; -int batch_count = test_input_size / batch_size; -float final_accuracy = 0.0; - -for(int i = 0; i < batch_count; i++){ - - -std::string dir_prefix = std::string("../../keras/data/mobilenet_quant/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); -std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); -void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); -void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); -void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); -void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); -std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); -void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); -std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); -void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); -void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); -void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); -std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); -void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); -std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); -void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); -void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); -void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); -void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); -std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); -void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); -std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); -void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); -void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); -void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); -std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); -void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); -std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); -void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); -void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); -void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); -void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); -void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); -void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); -void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); -void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); -void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); -std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); -void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); -void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); -void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); -void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); -std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); -void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); -std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); -void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); -void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); -void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); -std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); -void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); -std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); -void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); -void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); -void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); -void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); -void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); -void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); -void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); -void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); -void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); -std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); -void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); -void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); -void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); -void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); -std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); -void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); -std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); -void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); -void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); -void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); -std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); -void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); -std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); -void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); -void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); -void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); -void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); -void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); -void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); -void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); -void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); -void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); -void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); -void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); -void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); -void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); -void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); -void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); -void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); -void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); -void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); -void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); -void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); -void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); -void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); -void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); -void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); -void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); -void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); -void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); -void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); -void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); -void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); -void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); -void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); -void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); -void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); -void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); -void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); -void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); -void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); -void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); -void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); -void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); -void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); -void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); -void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); -void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); -std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); -void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); -void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); -void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); -void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); -std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); -void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); -std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); -void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); -void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); -void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); -std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); -void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); -std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); -void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); -void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); -void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); -void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); -std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); -void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); -std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); -void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); -void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); -void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); -void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); -std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); -void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); -std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); -void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); -void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); -void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); -std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); -void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - -int start = i * batch_size; -int end = (i + 1) * batch_size; - -void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - -void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -2.196306920051575, 1.347581704139706, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -60.89275047302246, 51.99256916046146, 9); -void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); -void* var_2 = tensorRelu(var_1); -void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); -void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); -void* var_5 = tensorRelu(var_4); -void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 5.713541553974245, conv2d_2_w, -0.9317721160650253, 1.0774258937835774, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.518589503288269, 6.810842518806449, 9); -void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); -void* var_8 = tensorRelu(var_7); -void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); -void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); -void* var_11 = tensorRelu(var_10); -void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.932139402866376, conv2d_3_w, -0.5316544661521911, 0.5753790403604531, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.482631235122681, 3.96730119752885, 9); -void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); -void* var_14 = tensorRelu(var_13); -void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); -void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); -void* var_17 = tensorRelu(var_16); -void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.103263397693674, conv2d_4_w, -0.36234098821878435, 0.4076913900375366, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.04261828327179, 3.88677932929993, 9); -void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); -void* var_20 = tensorRelu(var_19); -void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); -void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); -void* var_23 = tensorRelu(var_22); -void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 5.383221302509475, conv2d_5_w, -0.3131200549006462, 0.29357679939270065, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.921469215393066, 4.338679324150087, 9); -void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); -void* var_26 = tensorRelu(var_25); -void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); -void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); -void* var_29 = tensorRelu(var_28); -void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 4.316738154411368, conv2d_6_w, -0.23299247801303866, 0.2580290257930756, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.207789947509766, 3.932436970710759, 9); -void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); -void* var_32 = tensorRelu(var_31); -void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); -void* var_34 = tensorBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); -void* var_35 = tensorRelu(var_34); -void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 5.830408106803901, conv2d_7_w, -0.20233777219057084, 0.18998308175802117, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.298286915779113, 4.848135117530843, 9); -void* var_37 = tensorBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); -void* var_38 = tensorRelu(var_37); -void* var_39 = tensorConvolution(var_38, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); -void* var_40 = tensorBatchNorm(var_39, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); -void* var_41 = tensorRelu(var_40); -void* var_42 = ConvLayer_PROMISE(var_41, 0.0, 4.446417809963227, conv2d_8_w, -0.17442735651135444, 0.17695830866694454, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.347910885810852, 3.6144364695549145, 9); -void* var_43 = tensorBatchNorm(var_42, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); -void* var_44 = tensorRelu(var_43); -void* var_45 = tensorConvolution(var_44, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); -void* var_46 = tensorBatchNorm(var_45, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); -void* var_47 = tensorRelu(var_46); -void* var_48 = ConvLayer_PROMISE(var_47, 0.0, 4.518095604896667, conv2d_9_w, -0.14546796187758446, 0.15256431668996823, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.0287702755928043, 2.9487365779876953, 9); -void* var_49 = tensorBatchNorm(var_48, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); -void* var_50 = tensorRelu(var_49); -void* var_51 = tensorConvolution(var_50, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); -void* var_52 = tensorBatchNorm(var_51, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); -void* var_53 = tensorRelu(var_52); -void* var_54 = ConvLayer_PROMISE(var_53, 0.0, 6.348575634956407, conv2d_10_w, -0.13025874522328376, 0.13558243343234128, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.2293100805282595, 3.5315046372413645, 9); -void* var_55 = tensorBatchNorm(var_54, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); -void* var_56 = tensorRelu(var_55); -void* var_57 = tensorConvolution(var_56, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); -void* var_58 = tensorBatchNorm(var_57, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); -void* var_59 = tensorRelu(var_58); -void* var_60 = ConvLayer_PROMISE(var_59, 0.0, 5.221003110408843, conv2d_11_w, -0.11900172759592534, 0.12536374783515936, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.038203780174255, 4.004009407043483, 9); -void* var_61 = tensorBatchNorm(var_60, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); -void* var_62 = tensorRelu(var_61); -void* var_63 = tensorConvolution(var_62, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); -void* var_64 = tensorBatchNorm(var_63, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); -void* var_65 = tensorRelu(var_64); -void* var_66 = ConvLayer_PROMISE(var_65, 0.0, 5.732498347759442, conv2d_12_w, -0.10839721685647964, 0.11625668607652187, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.3111015114784244, 4.462933233261136, 9); -void* var_67 = tensorBatchNorm(var_66, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); -void* var_68 = tensorRelu(var_67); -void* var_69 = tensorConvolution(var_68, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); -void* var_70 = tensorBatchNorm(var_69, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); -void* var_71 = tensorRelu(var_70); -void* var_72 = ConvLayer_PROMISE(var_71, 0.0, 7.240498211860681, conv2d_13_w, -0.08623744961619377, 0.08859449951350662, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.175431394577027, 6.2043294754027345, 9); -void* var_73 = tensorBatchNorm(var_72, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); -void* var_74 = tensorRelu(var_73); -void* var_75 = tensorConvolution(var_74, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); -void* var_76 = tensorBatchNorm(var_75, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); -void* var_77 = tensorRelu(var_76); -void* var_78 = ConvLayer_PROMISE(var_77, 0.0, 7.813958834648251, conv2d_14_w, -0.06813025139272214, 0.07002027779817581, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -10.920566423416137, 2.6442912578582534, 9); -void* var_79 = tensorBatchNorm(var_78, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); -void* var_80 = tensorRelu(var_79); -void* var_81 = tensorPooling(var_80,1,2,2,0,0,2,2); -void* var_82 = FCLayer_PROMISE(var_81, 0.0, 2.8692066650391013, dense_1_w, -0.22301019695401192, 0.1442659378200768, dense_1_b, -0.1654396, 0.23336112, -1, -12.245949958801269, 23.80532513427739, 9); -void* var_83 = tensorSoftmax(var_82); - -uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - -float accuracy = computeAccuracy2(labels, batch_size, var_83); -final_accuracy += accuracy; -freeBatchMemory(); - -} - -final_accuracy = final_accuracy / batch_count; -dumpFinalAccuracy(final_accuracy); - - -} - -dumpExecutionAccuracies(); - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_quant.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_quant.cc deleted file mode 100644 index c3f11e1f2ff7f0a255d40ecd5916fbdada2b0be3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_quant.cc +++ /dev/null @@ -1,210 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(1); - - int total_runs = 1; - - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 2000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../../keras/data/mobilenet_shallow/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -1.340709443449974, 1.3555025291442875, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -7.2273098745346065, 8.197232282638556, 9); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_5 = tensorRelu(var_4); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 6.053754347324407, conv2d_2_w, -1.1412922372817993, 0.9433415410518639, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.87497807598114, 5.3558874282836655, 9); - void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_8 = tensorRelu(var_7); - void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_11 = tensorRelu(var_10); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.144686742782682, conv2d_3_w, -0.6453772538900375, 0.6694499132037164, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.349411018371582, 4.33332164001466, 9); - void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 2, 2, 1, 64); - void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_17 = tensorRelu(var_16); - void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.248231422424324, conv2d_4_w, -0.44596208560466766, 0.49276923143864204, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.3628717079162596, 3.058014160633088, 9); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 5.569213481903169, conv2d_5_w, -0.3239764194488525, 0.2983359285593033, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.473401127815246, 4.425663429260224, 9); - void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_29 = tensorRelu(var_28); - void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 3.728998669862753, conv2d_6_w, -0.2290773878991604, 0.22830345794558554, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.1163714165687564, 2.065946404457088, 9); - void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_32 = tensorRelu(var_31); - void* var_33 = tensorPooling(var_32,1,2,2,0,0,2,2); - void* var_34 = FCLayer_PROMISE(var_33, 0.0, 2.191649281263443, dense_1_w, -0.3627079802751541, 0.3849146918058397, dense_1_b, -0.37858343, 0.377391, -1, -11.424064125061035, 18.695249080657973, 9); - void* var_35 = tensorSoftmax(var_34); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_35); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_valid.cc deleted file mode 100644 index ba2a14d990a7b7d3d1cc6ad4bc818b2c199a0c6b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_shallow_valid.cc +++ /dev/null @@ -1,235 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - int total_runs = 40; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 4000; - int batch_size = 2000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../../keras/data/mobilenet_shallow_nathan/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - int start = (i * batch_size) + 4000; - int end = ((i + 1) * batch_size) + 4000; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -1.5164621164798737, 1.6472081774473288, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -9.868980642318725, 10.560956018447879, 9); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_5 = tensorRelu(var_4); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 6.821381127357554, conv2d_2_w, -1.1834390873908995, 1.2731596627235617, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -9.875998497009277, 7.51305247974393, 9); - void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_8 = tensorRelu(var_7); - void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_11 = tensorRelu(var_10); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.826067455768602, conv2d_3_w, -0.599876856982708, 0.6812073457241064, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.633289833068848, 5.177892235755925, 9); - void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_17 = tensorRelu(var_16); - void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.02646304416659, conv2d_4_w, -0.4555967862010002, 0.4942613914608956, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.316803941726685, 4.605850250244146, 9); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 4.532649063110355, conv2d_5_w, -0.35657615590095515, 0.3382165088057521, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.1012511816024775, 4.3630500688553, 9); - void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_29 = tensorRelu(var_28); - void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 3.9874704387188977, conv2d_6_w, -0.28502783328294756, 0.28604640334844594, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.243851703643799, 3.486250406742097, 9); - void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_32 = tensorRelu(var_31); - void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 6.563065901756522, conv2d_7_w, -0.18946402323246003, 0.19012390717864017, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.938115713119507, 3.538363476753238, 9); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,1,2,2,0,0,2,2); - void* var_40 = FCLayer_PROMISE(var_39, 0.0, 1.8908388000727185, dense_1_w, -0.35140394401550296, 0.422872786462307, dense_1_b, -0.23878151, 0.26507422, -1, -14.630816223144532, 27.27252123260504, 9); - void* var_41 = tensorSoftmax(var_40); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_41); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_valid.cc deleted file mode 100644 index 8cf22349346230889f9a4f1385b94ceabe04612e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/mobilenet_valid.cc +++ /dev/null @@ -1,418 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(1); - - int total_runs = 40; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 4000; - int batch_size = 2000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - - std::string dir_prefix = std::string("../../keras/data/mobilenet_quant/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); - void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); - void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); - void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); - void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); - void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); - void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); - std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); - void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - int start = (i * batch_size) + 4000; - int end = ((i + 1) * batch_size) + 4000; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -2.196306920051575, 1.347581704139706, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -60.89275047302246, 51.99256916046146, 9); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_5 = tensorRelu(var_4); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 5.713541553974245, conv2d_2_w, -0.9317721160650253, 1.0774258937835774, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.518589503288269, 6.810842518806449, 9); - void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_8 = tensorRelu(var_7); - void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_11 = tensorRelu(var_10); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.932139402866376, conv2d_3_w, -0.5316544661521911, 0.5753790403604531, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.482631235122681, 3.96730119752885, 9); - void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_17 = tensorRelu(var_16); - void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.103263397693674, conv2d_4_w, -0.36234098821878435, 0.4076913900375366, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.04261828327179, 3.88677932929993, 9); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 5.383221302509475, conv2d_5_w, -0.3131200549006462, 0.29357679939270065, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.921469215393066, 4.338679324150087, 9); - void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_29 = tensorRelu(var_28); - void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 4.316738154411368, conv2d_6_w, -0.23299247801303866, 0.2580290257930756, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.207789947509766, 3.932436970710759, 9); - void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_32 = tensorRelu(var_31); - void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 5.830408106803901, conv2d_7_w, -0.20233777219057084, 0.18998308175802117, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.298286915779113, 4.848135117530843, 9); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorConvolution(var_38, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_40 = tensorBatchNorm(var_39, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_41 = tensorRelu(var_40); - void* var_42 = ConvLayer_PROMISE(var_41, 0.0, 4.446417809963227, conv2d_8_w, -0.17442735651135444, 0.17695830866694454, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.347910885810852, 3.6144364695549145, 9); - void* var_43 = tensorBatchNorm(var_42, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_44 = tensorRelu(var_43); - void* var_45 = tensorConvolution(var_44, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_46 = tensorBatchNorm(var_45, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_47 = tensorRelu(var_46); - void* var_48 = ConvLayer_PROMISE(var_47, 0.0, 4.518095604896667, conv2d_9_w, -0.14546796187758446, 0.15256431668996823, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.0287702755928043, 2.9487365779876953, 9); - void* var_49 = tensorBatchNorm(var_48, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorConvolution(var_50, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_52 = tensorBatchNorm(var_51, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_53 = tensorRelu(var_52); - void* var_54 = ConvLayer_PROMISE(var_53, 0.0, 6.348575634956407, conv2d_10_w, -0.13025874522328376, 0.13558243343234128, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.2293100805282595, 3.5315046372413645, 9); - void* var_55 = tensorBatchNorm(var_54, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_56 = tensorRelu(var_55); - void* var_57 = tensorConvolution(var_56, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_58 = tensorBatchNorm(var_57, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_59 = tensorRelu(var_58); - void* var_60 = ConvLayer_PROMISE(var_59, 0.0, 5.221003110408843, conv2d_11_w, -0.11900172759592534, 0.12536374783515936, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.038203780174255, 4.004009407043483, 9); - void* var_61 = tensorBatchNorm(var_60, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_62 = tensorRelu(var_61); - void* var_63 = tensorConvolution(var_62, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_64 = tensorBatchNorm(var_63, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_65 = tensorRelu(var_64); - void* var_66 = ConvLayer_PROMISE(var_65, 0.0, 5.732498347759442, conv2d_12_w, -0.10839721685647964, 0.11625668607652187, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.3111015114784244, 4.462933233261136, 9); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_68 = tensorRelu(var_67); - void* var_69 = tensorConvolution(var_68, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_70 = tensorBatchNorm(var_69, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_71 = tensorRelu(var_70); - void* var_72 = ConvLayer_PROMISE(var_71, 0.0, 7.240498211860681, conv2d_13_w, -0.08623744961619377, 0.08859449951350662, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.175431394577027, 6.2043294754027345, 9); - void* var_73 = tensorBatchNorm(var_72, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_74 = tensorRelu(var_73); - void* var_75 = tensorConvolution(var_74, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_76 = tensorBatchNorm(var_75, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_77 = tensorRelu(var_76); - void* var_78 = ConvLayer_PROMISE(var_77, 0.0, 7.813958834648251, conv2d_14_w, -0.06813025139272214, 0.07002027779817581, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -10.920566423416137, 2.6442912578582534, 9); - void* var_79 = tensorBatchNorm(var_78, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_80 = tensorRelu(var_79); - void* var_81 = tensorPooling(var_80,1,2,2,0,0,2,2); - void* var_82 = FCLayer_PROMISE(var_81, 0.0, 2.8692066650391013, dense_1_w, -0.22301019695401192, 0.1442659378200768, dense_1_b, -0.1654396, 0.23336112, -1, -12.245949958801269, 23.80532513427739, 9); - void* var_83 = tensorSoftmax(var_82); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_83); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_promise.cc deleted file mode 100644 index f696bbf259b26eb4e45b73aa05658f3208c6fae6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_promise.cc +++ /dev/null @@ -1,146 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Pipeline: Gaussian - Emboss - Motion Blur - Outline ********** \n"); - - int test_batch_size = 1000; - int H = 240; - int W = 300; - float PSNR = 30; - - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEMO_calib.bin", - float_type, - test_batch_size, 1, H, W); - - clearTensorMap(); - for(int i = 0; i < total_runs; i++){ - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters - - - void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128, - 2, 2, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* motionblur_out = ConvLayer_PROMISE(emboss_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* result = ConvLayer_PROMISE(motionblur_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0, - 1, 1, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - - hpvm_request_tensor(result, 0); - //dumpOutput(result); - - computePSNRViolation(golden_output, result, PSNR); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(1); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_valid.cc deleted file mode 100644 index a8d049f7aca85fbc00e7bcd2b47c083d4f6ea377..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEMO_valid.cc +++ /dev/null @@ -1,146 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Pipeline: Gaussian - Emboss - Motion Blur - Outline ********** \n"); - - int test_batch_size = 1000; - int H = 240; - int W = 300; - float PSNR = 30; - - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEMO_valid.bin", - float_type, - test_batch_size, 1, H, W); - - clearTensorMap(); - for(int i = 0; i < total_runs; i++){ - void* input = readTrainedWeights("../model_params/pipeline/dataset/test_4573.bin", - float_type, - test_batch_size, 1, H, W); - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters - - - void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128, - 2, 2, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* motionblur_out = ConvLayer_PROMISE(emboss_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* result = ConvLayer_PROMISE(motionblur_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0, - 1, 1, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - - hpvm_request_tensor(result, 0); - //dumpOutput(result, "GEMO_valid.bin"); - - computePSNRViolation(golden_output, result, PSNR); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_promise.cc deleted file mode 100644 index 26ab88f81a70e032a723537864fb5eb4fead6a5b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_promise.cc +++ /dev/null @@ -1,146 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Pipeline: Gaussian - Emboss - Outline - Motion Blur ********** \n"); - - int test_batch_size = 1000; - int H = 240; - int W = 300; - float PSNR = 30; - - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEOM_calib.bin", - float_type, - test_batch_size, 1, H, W); - - clearTensorMap(); - for(int i = 0; i < total_runs; i++){ - - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters - - - void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128, - 2, 2, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* outline_out = ConvLayer_PROMISE(emboss_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0, - 1, 1, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* result = ConvLayer_PROMISE(outline_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - - hpvm_request_tensor(result, 0); - - computePSNRViolation(golden_output, result, PSNR); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(1); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_valid.cc deleted file mode 100644 index 1fd996f701664358d408f3b7e7a095a66c78f9ef..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEOM_valid.cc +++ /dev/null @@ -1,148 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Pipeline: Gaussian - Emboss - Outline - Motion Blur ********** \n"); - - int test_batch_size = 1000; - int H = 240; - int W = 300; - float PSNR = 30; - - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEOM_valid.bin", - float_type, - test_batch_size, 1, H, W); - - clearTensorMap(); - for(int i = 0; i < total_runs; i++){ - void* input = readTrainedWeights("../model_params/pipeline/dataset/test_4573.bin", - float_type, - test_batch_size, 1, H, W); - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters - - - void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128, - 2, 2, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* outline_out = ConvLayer_PROMISE(emboss_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0, - 1, 1, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* result = ConvLayer_PROMISE(outline_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - - hpvm_request_tensor(result, 0); - //dumpOutput(result, "GEOM_valid.bin"); - - computePSNRViolation(golden_output, result, PSNR); - - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_promise.cc deleted file mode 100644 index 577145a01eb3e5e941588fbfddf153c78156dc0c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_promise.cc +++ /dev/null @@ -1,139 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Pipeline: Gaussian - Emboss - Outline ********** \n"); - - int test_batch_size = 1000; - int H = 240; - int W = 300; - float PSNR = 30; - - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEO_calib.bin", - float_type, - test_batch_size, 1, H, W); - - clearTensorMap(); - for(int i = 0; i < total_runs; i++){ - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters - - - void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128, - 2, 2, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* result = ConvLayer_PROMISE(emboss_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0, - 1, 1, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - - hpvm_request_tensor(result, 0); - dumpOutput(result, "GEO_approx.bin"); - - computePSNRViolation(golden_output, result, PSNR); - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(1); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_valid.cc deleted file mode 100644 index 6f269a8ad51734e372ce14dd5c5b94342417e2b2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GEO_valid.cc +++ /dev/null @@ -1,143 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Pipeline: Gaussian - Emboss - Outline ********** \n"); - - int test_batch_size = 1000; - int H = 240; - int W = 300; - float PSNR = 30; - - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GEO_valid.bin", - float_type, - test_batch_size, 1, H, W); - - clearTensorMap(); - for(int i = 0; i < total_runs; i++){ - void* input = readTrainedWeights("../model_params/pipeline/dataset/test_4573.bin", - float_type, - test_batch_size, 1, H, W); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters - - - void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* emboss_out = ConvLayer_PROMISE(gaussian_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128, - 2, 2, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* result = ConvLayer_PROMISE(emboss_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0, - 1, 1, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - - - hpvm_request_tensor(result, 0); - dumpOutput(result, "GEO_40_psnr.bin"); - - computePSNRViolation(golden_output, result, PSNR); - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_promise.cc deleted file mode 100644 index d6150a9ee4b5d99eaa736a911e0fc9da2d593150..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_promise.cc +++ /dev/null @@ -1,146 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Pipeline: Gaussian - Sharpen - Motion Blur - Emboss ********** \n"); - - int test_batch_size = 1000; - int H = 240; - int W = 300; - float PSNR = 30; - - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSME_calib.bin", - float_type, - test_batch_size, 1, H, W); - - clearTensorMap(); - for(int i = 0; i < total_runs; i++){ - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters - - - void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* sharpen_out = ConvLayer_PROMISE(gaussian_out, 0, 255, sharpen_filter, -1, 8, NULL, 0, 0, - 1, 1, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* motionblur_out = ConvLayer_PROMISE(sharpen_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* result = ConvLayer_PROMISE(motionblur_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128, - 2, 2, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - hpvm_request_tensor(result, 0); - //dumpOutput(result); - - computePSNRViolation(golden_output, result, PSNR); - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(1); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_valid.cc deleted file mode 100644 index 8c2a1b9f63d41654bf6425a5670cb41fd64b64cf..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSME_valid.cc +++ /dev/null @@ -1,146 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Pipeline: Gaussian - Sharpen - Motion Blur - Emboss ********** \n"); - - int test_batch_size = 1000; - int H = 240; - int W = 300; - float PSNR = 30; - - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSME_valid.bin", - float_type, - test_batch_size, 1, H, W); - - clearTensorMap(); - for(int i = 0; i < total_runs; i++){ - void* input = readTrainedWeights("../model_params/pipeline/dataset/test_4573.bin", - float_type, - test_batch_size, 1, H, W); - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters - - - void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* sharpen_out = ConvLayer_PROMISE(gaussian_out, 0, 255, sharpen_filter, -1, 8, NULL, 0, 0, - 1, 1, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* motionblur_out = ConvLayer_PROMISE(sharpen_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* result = ConvLayer_PROMISE(motionblur_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128, - 2, 2, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - hpvm_request_tensor(result, 0); - dumpOutput(result, "GSME_valid_20db.bin"); - - computePSNRViolation(golden_output, result, PSNR); - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_promise.cc deleted file mode 100644 index 59077e94a918a8d5540b713c08af5eb6e73cb86f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_promise.cc +++ /dev/null @@ -1,138 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Pipeline: Gaussian - Sharpen - Motion Blur ********** \n"); - - int test_batch_size = 1000; - int H = 240; - int W = 300; - float PSNR = 30; - - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSM_calib.bin", - float_type, - test_batch_size, 1, H, W); - - clearTensorMap(); - for(int i = 0; i < total_runs; i++){ - void* input = readTrainedWeights("../model_params/pipeline/dataset/calibration_4572.bin", - float_type, - test_batch_size, 1, H, W); - - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters - - - void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* sharpen_out = ConvLayer_PROMISE(gaussian_out, 0, 255, sharpen_filter, -1, 8, NULL, 0, 0, - 1, 1, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* result = ConvLayer_PROMISE(sharpen_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - - hpvm_request_tensor(result, 0); - dumpOutput(result, "GSM_approx.bin"); - - computePSNRViolation(golden_output, result, PSNR); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(1); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_valid.cc deleted file mode 100644 index 45b38e82864b97be220eecbe91ce3d6bfdce6318..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_GSM_valid.cc +++ /dev/null @@ -1,141 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Pipeline: Gaussian - Sharpen - Motion Blur ********** \n"); - - int test_batch_size = 1000; - int H = 240; - int W = 300; - float PSNR = 30; - - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/GSM_valid.bin", - float_type, - test_batch_size, 1, H, W); - - clearTensorMap(); - for(int i = 0; i < total_runs; i++){ - void* input = readTrainedWeights("../model_params/pipeline/dataset/test_4573.bin", - float_type, - test_batch_size, 1, H, W); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("promise_flags"); // Resets the OpenTuner counters - - - void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* sharpen_out = ConvLayer_PROMISE(gaussian_out, 0, 255, sharpen_filter, -1, 8, NULL, 0, 0, - 1, 1, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* result = ConvLayer_PROMISE(sharpen_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - - hpvm_request_tensor(result, 0); - dumpOutput(result, "GSM_valid_30db.bin"); - - computePSNRViolation(golden_output, result, PSNR); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_promise.cc deleted file mode 100644 index 7dd70134731c4fc13d6b7ca239f0566942c02885..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/pipeline_promise.cc +++ /dev/null @@ -1,149 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testPipeline(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - printf("********* Pipeline: Gaussian - Outline - Motion Blur - Emboss ********** \n"); - - int test_batch_size = 2000; - int H = 240; - int W = 300; - float PSNR = 30; - - void* golden_output = readTrainedWeights("../model_params/pipeline/golden_output/caltech-G-O-M-E-FP32-clipped-2000.bin", - float_type, - test_batch_size, 1, H, W); - - clearTensorMap(); - for(int i = 0; i < total_runs; i++){ - void* input = readTrainedWeights("../model_params/pipeline/dataset/caltech101_255_float32.bin", - float_type, - test_batch_size, 1, H, W); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* gaussian_filter = readTrainedWeights("../model_params/pipeline/filters/GaussianFilter.bin", - float_type, 1, 1, 9, 9); - void* outline_filter = readTrainedWeights("../model_params/pipeline/filters/OutlineFilter.bin", - float_type, 1, 1, 3, 3); - void* sharpen_filter = readTrainedWeights("../model_params/pipeline/filters/SharpenFilter.bin", - float_type, 1, 1, 3, 3); - void* motionblur_filter = readTrainedWeights("../model_params/pipeline/filters/MotionblurFilter.bin", - float_type, 1, 1, 9, 9); - void* emboss_filter = readTrainedWeights("../model_params/pipeline/filters/EmbossFilter.bin", - float_type, 1, 1, 5, 5); - void* emboss_bias = readTrainedWeights("../model_params/pipeline/filters/EmbossBias.bin", - float_type, 1, 1, 1, 1); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - - void* gaussian_out = ConvLayer_PROMISE(input, 0, 255, gaussian_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - printf("Gaussian done\n"); - void* outline_out = ConvLayer_PROMISE(gaussian_out, 0, 255, outline_filter, -1, 8, NULL, 0, 0, - 1, 1, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* motionblur_out = ConvLayer_PROMISE(outline_out, 0, 255, motionblur_filter, 0, 1, NULL, 0, 0, - 4, 4, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - void* result = ConvLayer_PROMISE(motionblur_out, 0, 255, emboss_filter, -1, 1, emboss_bias, 128, 128, - 2, 2, 1, 1, - 0, 0, // pool? no pooling needed - 2, - 0, 255, // out min max? should we assume 0 - 255 for all filters. - // Will have to rerun to generate golden output - 9); - - hpvm_request_tensor(result, 0); - dumpOutput(result); - - computePSNRViolation(golden_output, result, PSNR); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - } -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testPipeline(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_promise_relu.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_promise_relu.cc deleted file mode 100644 index 7b7f989c16d9203778a602bc03b79a5d41c7a3ba..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_promise_relu.cc +++ /dev/null @@ -1,162 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - - - -std::string dir_prefix = std::string("../model_params/resnet18_cifar10_promise/"); -std::string input_path = dir_prefix + std::string("input.bin"); -void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); -std::string labels_path = dir_prefix + std::string("labels.bin"); -uint8_t* labels = readLabels(labels_path.c_str(),10000); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); -std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); -void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); -void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); -void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); -void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); -void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); -void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); -std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); -void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); -std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); -void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); -std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); -void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); -void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); -void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); -void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); -void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); -void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); -std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); -void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); -void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); -std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); -void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); -void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); -std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); -void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); -void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); -void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); -void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); -void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); -void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); -void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); -void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); -void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); -void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); -void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - -void* var_0 = ConvLayer_PROMISE(input, -0.5500815, 0.60786617, conv2d_1_w, -0.71850556, 0.79279953, conv2d_1_b, -0.2551266, 0.14472985, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.2546353, 9); -void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 2.2546353, conv2d_2_w, -0.5433847, 0.5556715, conv2d_2_b, -0.19323121, 0.20603828, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.6603086, 9); -void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 3.6603086, conv2d_3_w, -0.541787, 0.51889443, conv2d_3_b, -0.2030649, 0.21818772, 1, 1, 1, 1, -1, 0, -1, -5.471612, 5.295037, 9); -void* var_3 = tensorAdd(var_0, var_2); -void* var_4 = tensorRelu(var_3); -void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 6.738059, conv2d_4_w, -0.691922, 0.3410589, conv2d_4_b, -0.5095374, 0.18683507, 1, 1, 1, 1, -1, 0, 1, 0.0, 5.2085133, 9); -void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 5.2085133, conv2d_5_w, -0.40904462, 0.39255425, conv2d_5_b, -0.2069035, 0.117769495, 1, 1, 1, 1, -1, 0, -1, -5.6378636, 6.844163, 9); -void* var_7 = tensorAdd(var_4, var_6); -void* var_8 = tensorRelu(var_7); -void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 8.4156885, conv2d_6_w, -0.38497055, 0.3736088, conv2d_6_b, -0.14458452, 0.18792383, 1, 1, 1, 1, -1, 0, 1, 0.0, 6.5020022, 9); -void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 6.5020022, conv2d_7_w, -0.30858195, 0.4282964, conv2d_7_b, -0.1807645, 0.07482771, 1, 1, 1, 1, -1, 0, -1, -3.3083274, 5.364109, 9); -void* var_11 = tensorAdd(var_8, var_10); -void* var_12 = tensorRelu(var_11); -void* var_13 = ConvLayer_PROMISE(var_12, 0.0, 9.382513, conv2d_8_w, -0.5838584, 0.44527876, conv2d_8_b, -0.2637087, 0.22768898, 1, 1, 2, 2, -1, 0, 1, 0.0, 12.158108, 9); -void* var_14 = ConvLayer_PROMISE(var_13, 0.0, 12.158108, conv2d_9_w, -0.46162197, 0.42936426, conv2d_9_b, -0.1289545, 0.51804763, 1, 1, 1, 1, -1, 0, -1, -17.15394, 12.169734, 9); -void* var_15 = ConvLayer_PROMISE(var_12, 0.0, 9.382513, conv2d_10_w, -0.69971406, 0.99415976, conv2d_10_b, -0.1289545, 0.51804763, 0, 0, 2, 2, -1, 0, -1, -5.418469, 11.448848, 9); -void* var_16 = tensorAdd(var_15, var_14); -void* var_17 = tensorRelu(var_16); -void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 18.46502, conv2d_11_w, -0.43470153, 0.36867705, conv2d_11_b, -0.51738244, 0.15350178, 1, 1, 1, 1, -1, 0, 1, 0.0, 13.211603, 9); -void* var_19 = ConvLayer_PROMISE(var_18, 0.0, 13.211603, conv2d_12_w, -0.29948497, 0.38820583, conv2d_12_b, -0.37389848, 0.26664862, 1, 1, 1, 1, -1, 0, -1, -10.085186, 13.555471, 9); -void* var_20 = tensorAdd(var_17, var_19); -void* var_21 = tensorRelu(var_20); -void* var_22 = ConvLayer_PROMISE(var_21, 0.0, 22.695429, conv2d_13_w, -0.44317818, 0.30531815, conv2d_13_b, -0.36851564, 0.06573071, 1, 1, 1, 1, -1, 0, 1, 0.0, 19.886229, 9); -void* var_23 = ConvLayer_PROMISE(var_22, 0.0, 19.886229, conv2d_14_w, -0.3271309, 0.33153397, conv2d_14_b, -0.38927156, 0.066472165, 1, 1, 1, 1, -1, 0, -1, -8.295334, 15.001421, 9); -void* var_24 = tensorAdd(var_21, var_23); -void* var_25 = tensorRelu(var_24); -void* var_26 = ConvLayer_PROMISE(var_25, 0.0, 28.637527, conv2d_15_w, -0.44983515, 0.43999374, conv2d_15_b, -0.21998975, 0.36213604, 1, 1, 2, 2, -1, 0, 1, 0.0, 44.106163, 9); -void* var_27 = ConvLayer_PROMISE(var_26, 0.0, 44.106163, conv2d_16_w, -0.4508994, 0.41697323, conv2d_16_b, -0.27649263, 0.42242092, 1, 1, 1, 1, -1, 0, -1, -47.52727, 75.15572, 9); -void* var_28 = ConvLayer_PROMISE(var_25, 0.0, 28.637527, conv2d_17_w, -0.57827795, 0.7829617, conv2d_17_b, -0.27649248, 0.42242065, 0, 0, 2, 2, -1, 0, -1, -8.998529, 10.628808, 9); -void* var_29 = tensorAdd(var_28, var_27); -void* var_30 = tensorRelu(var_29); -void* var_31 = ConvLayer_PROMISE(var_30, 0.0, 77.00688, conv2d_18_w, -0.37020415, 0.4076619, conv2d_18_b, -0.572569, 0.288411, 1, 1, 1, 1, -1, 0, 1, 0.0, 58.209835, 9); -void* var_32 = ConvLayer_PROMISE(var_31, 0.0, 58.209835, conv2d_19_w, -0.40217596, 0.4803875, conv2d_19_b, -0.84837836, 0.41470897, 1, 1, 1, 1, -1, 0, -1, -61.702118, 45.982677, 9); -void* var_33 = tensorAdd(var_30, var_32); -void* var_34 = tensorRelu(var_33); -void* var_35 = ConvLayer_PROMISE(var_34, 0.0, 98.688995, conv2d_20_w, -0.41761914, 0.4584275, conv2d_20_b, -0.335136, 0.42988807, 1, 1, 1, 1, -1, 0, 1, 0.0, 95.06278, 9); -void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 95.06278, conv2d_21_w, -0.32336038, 0.35931262, conv2d_21_b, -0.645176, 0.45402992, 1, 1, 1, 1, -1, 0, -1, -116.62798, 127.2517, 9); -void* var_37 = tensorAdd(var_34, var_36); -void* var_38 = tensorRelu(var_37); -void* var_39 = tensorPooling(var_38,1,8,8,0,0,8,8); -void* var_40 = FCLayer_PROMISE(var_39, 0.0, 22.762705, dense_1_w, -0.876813, 0.6065728, dense_1_b, -0.36824417, 0.25160706, -1, -30.914663, 49.802082, 9); -void* var_41 = tensorSoftmax(var_40); - -computeAccuracy2(labels,10000,var_41); - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_valid.cc deleted file mode 100644 index 63aef3744fabc598ccc6653534074283edecef03..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/resnet18_valid.cc +++ /dev/null @@ -1,189 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - int total_runs = 20; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 2500; - int offset = 0; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../model_params/resnet18_cifar10_promise/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -0.5500815, 0.60786617, conv2d_1_w, -1.0248864, 1.2929907, conv2d_1_b, -0.36291853, 0.2533059, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.8791630274057383, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 0.8791630274057383, conv2d_2_w, -0.69884616, 0.71849966, conv2d_2_b, -0.2781147, 0.45571187, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1859495645761484, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 1.1859495645761484, conv2d_3_w, -0.59568167, 0.7714691, conv2d_3_b, -0.8602873, 0.19743633, 1, 1, 1, 1, -1, 0, -1, -2.2316832554340365, 2.266301159858699, 9); - void* var_3 = tensorAdd(var_0, var_2); - void* var_4 = tensorRelu(var_3); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 2.789569139480591, conv2d_4_w, -0.41976976, 0.43748936, conv2d_4_b, -0.7021962, 0.3033103, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.3341254055499974, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 1.3341254055499974, conv2d_5_w, -0.46757826, 0.4635873, conv2d_5_b, -0.20662616, 0.1778044, 1, 1, 1, 1, -1, 0, -1, -0.9912706619501114, 1.0245310074090952, 9); - void* var_7 = tensorAdd(var_4, var_6); - void* var_8 = tensorRelu(var_7); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 2.998989346027372, conv2d_6_w, -0.64404047, 0.45383143, conv2d_6_b, -0.819547, 0.38550296, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.2850778144597967, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 1.2850778144597967, conv2d_7_w, -0.41986948, 0.33654243, conv2d_7_b, -0.3563013, 0.22371122, 1, 1, 1, 1, -1, 0, -1, -1.2940701305866242, 0.7332147359848022, 9); - void* var_11 = tensorAdd(var_8, var_10); - void* var_12 = tensorRelu(var_11); - void* var_13 = ConvLayer_PROMISE(var_12, 0.0, 2.8626382386684384, conv2d_8_w, -0.4805263, 0.50655717, conv2d_8_b, -0.296758, 0.7742441, 1, 1, 2, 2, -1, 0, 1, 0.0, 3.6232483506202584, 9); - void* var_14 = ConvLayer_PROMISE(var_13, 0.0, 3.6232483506202584, conv2d_9_w, -0.52083415, 0.45517674, conv2d_9_b, -0.20242067, 0.8236838, 1, 1, 1, 1, -1, 0, -1, -6.319877154827118, 6.882811555862418, 9); - void* var_15 = ConvLayer_PROMISE(var_12, 0.0, 2.8626382386684384, conv2d_10_w, -0.5338656, 1.3395424, conv2d_10_b, -0.20242067, 0.8236838, 0, 0, 2, 2, -1, 0, -1, -0.9930689406394959, 2.8721754658222096, 9); - void* var_16 = tensorAdd(var_15, var_14); - void* var_17 = tensorRelu(var_16); - void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 8.315858840942383, conv2d_11_w, -0.34429058, 0.43629733, conv2d_11_b, -1.0744808, 0.056708273, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.6893706333637226, 9); - void* var_19 = ConvLayer_PROMISE(var_18, 0.0, 2.6893706333637226, conv2d_12_w, -0.30342352, 0.39493486, conv2d_12_b, -0.44630566, 0.6492069, 1, 1, 1, 1, -1, 0, -1, -1.8801953810453416, 1.714934362173068, 9); - void* var_20 = tensorAdd(var_17, var_19); - void* var_21 = tensorRelu(var_20); - void* var_22 = ConvLayer_PROMISE(var_21, 0.0, 8.381670951843262, conv2d_13_w, -0.38351893, 0.45775774, conv2d_13_b, -1.4733055, -0.014426912, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.569231034517287, 9); - void* var_23 = ConvLayer_PROMISE(var_22, 0.0, 2.569231034517287, conv2d_14_w, -0.25695276, 0.45372736, conv2d_14_b, -0.5259744, 0.26591402, 1, 1, 1, 1, -1, 0, -1, -1.9701244848966597, 1.4661400413513093, 9); - void* var_24 = tensorAdd(var_21, var_23); - void* var_25 = tensorRelu(var_24); - void* var_26 = ConvLayer_PROMISE(var_25, 0.0, 8.188224797248836, conv2d_15_w, -0.55299705, 0.5443531, conv2d_15_b, -0.71790683, 1.2730768, 1, 1, 2, 2, -1, 0, 1, 0.0, 12.411911067962677, 9); - void* var_27 = ConvLayer_PROMISE(var_26, 0.0, 12.411911067962677, conv2d_16_w, -0.4203967, 0.48641303, conv2d_16_b, -0.90653443, 1.3546854, 1, 1, 1, 1, -1, 0, -1, -25.407194147109987, 20.519153985977383, 9); - void* var_28 = ConvLayer_PROMISE(var_25, 0.0, 8.188224797248836, conv2d_17_w, -0.4365755, 0.84913826, conv2d_17_b, -0.90653443, 1.3546851, 0, 0, 2, 2, -1, 0, -1, -4.256520752906799, 5.730506427288059, 9); - void* var_29 = tensorAdd(var_28, var_27); - void* var_30 = tensorRelu(var_29); - void* var_31 = ConvLayer_PROMISE(var_30, 0.0, 22.350475664138983, conv2d_18_w, -0.38657624, 0.5228989, conv2d_18_b, -1.2083547, 0.76361173, 1, 1, 1, 1, -1, 0, 1, 0.0, 23.93387042045599, 9); - void* var_32 = ConvLayer_PROMISE(var_31, 0.0, 23.93387042045599, conv2d_19_w, -0.40857902, 0.575035, conv2d_19_b, -1.8731614, 1.0960501, 1, 1, 1, 1, -1, 0, -1, -35.37134181976318, 19.209569931030273, 9); - void* var_33 = tensorAdd(var_30, var_32); - void* var_34 = tensorRelu(var_33); - void* var_35 = ConvLayer_PROMISE(var_34, 0.0, 29.434949998855657, conv2d_20_w, -0.33079496, 0.5893278, conv2d_20_b, -1.0234511, 1.0016295, 1, 1, 1, 1, -1, 0, 1, 0.0, 27.216757345199866, 9); - void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 27.216757345199866, conv2d_21_w, -0.27897888, 0.38280907, conv2d_21_b, -2.2086356, 1.0066502, 1, 1, 1, 1, -1, 0, -1, -42.31447326660156, 29.365212144852038, 9); - void* var_37 = tensorAdd(var_34, var_36); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,1,8,8,0,0,8,8); - void* var_40 = FCLayer_PROMISE(var_39, 0.0, 13.736315393447876, dense_1_w, -1.5092047, 1.0279838, dense_1_b, -0.49379802, 0.61032647, -1, -45.52749088287353, 31.64324799537669, 9); - void* var_41 = tensorSoftmax(var_40); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_41); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_promise_quant.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_promise_quant.cc deleted file mode 100644 index bbc247fc46fa553a2d8fb479c77023c9960375d6..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_promise_quant.cc +++ /dev/null @@ -1,133 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - - int total_runs = 1; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 2500; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.7829767, 1.9456929, conv2d_1_w, -0.7450515, 0.71249133, conv2d_1_b, -1.5885142, 0.275554, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.7384350299835205, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.7384350299835205, conv2d_2_w, -0.30790088, 0.43504623, conv2d_2_b, -1.4242363, 1.2602744, 1, 1, 1, 1, 0, 2, 1, 0.0, 4.417154796123498, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 4.417154796123498, conv2d_3_w, -0.29189092, 0.26958522, conv2d_3_b, -1.0527138, 0.9075671, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.1919608163833573, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 3.1919608163833573, conv2d_4_w, -0.15521508, 0.1829038, conv2d_4_b, -0.845419, 1.9358484, 1, 1, 1, 1, 0, 2, 1, 0.0, 5.108994026184064, 9); - void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 5.108994026184064, conv2d_5_w, -0.13149762, 0.14811686, conv2d_5_b, -0.7162557, 1.0370971, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.8264513099193493, 9); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 2.8264513099193493, conv2d_6_w, -0.06236292, 0.08321518, conv2d_6_b, -0.9067523, 0.9922458, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.507186658382409, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 2.507186658382409, conv2d_7_w, -0.06471479, 0.1024472, conv2d_7_b, -0.15943134, 0.7988499, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.550416946411133, 9); - void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 2.550416946411133, conv2d_8_w, -0.06320205, 0.08291938, conv2d_8_b, -0.32540628, 0.5203079, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.7303829237818675, 9); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 0.7303829237818675, conv2d_9_w, -0.037707984, 0.051601283, conv2d_9_b, -0.25622904, 0.11251946, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.32286912292241965, 9); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 0.32286912292241965, conv2d_10_w, -0.056007143, 0.09549151, conv2d_10_b, -0.11591503, 0.06267536, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.47936276525258825, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 0.47936276525258825, conv2d_11_w, -0.060094673, 0.10868926, conv2d_11_b, -0.105962686, 0.09584572, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.6409912902116734, 9); - void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 0.6409912902116734, conv2d_12_w, -0.034618977, 0.05792674, conv2d_12_b, -0.4237576, 0.11035452, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1027569955587349, 9); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 1.1027569955587349, conv2d_13_w, -0.035480656, 0.058295887, conv2d_13_b, -0.21477045, 0.14263579, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.4708798038959503, 9); - void* var_13 = FCLayer_PROMISE(var_12, 0.0, 2.4708798038959503, dense_1_w, -0.08929961, 0.11301676, dense_1_b, -0.20798548, 0.47405547, 1, 0.0, 2.8148007798194876, 9); - void* var_14 = FCLayer_PROMISE(var_13, 0.0, 2.8148007798194876, dense_2_w, -0.6627122, 0.35539475, dense_2_b, -1.0631907, 0.9830786, -1, -21.189617557525633, 22.645009384155276, 9); - void* var_15 = tensorSoftmax(var_14); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_15, 100); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_top5_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_top5_valid.cc deleted file mode 100644 index 21eb3ba3e09a7a8ef5ae8940d4c60501ac01abe1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_top5_valid.cc +++ /dev/null @@ -1,136 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(2); - - int total_runs = 20; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 5000; - int offset = 0; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.7829767, 1.9456929, conv2d_1_w, -0.7450515, 0.71249133, conv2d_1_b, -1.5885142, 0.275554, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.7384350299835205, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.7384350299835205, conv2d_2_w, -0.30790088, 0.43504623, conv2d_2_b, -1.4242363, 1.2602744, 1, 1, 1, 1, 0, 2, 1, 0.0, 4.417154796123498, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 4.417154796123498, conv2d_3_w, -0.29189092, 0.26958522, conv2d_3_b, -1.0527138, 0.9075671, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.1919608163833573, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 3.1919608163833573, conv2d_4_w, -0.15521508, 0.1829038, conv2d_4_b, -0.845419, 1.9358484, 1, 1, 1, 1, 0, 2, 1, 0.0, 5.108994026184064, 9); - void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 5.108994026184064, conv2d_5_w, -0.13149762, 0.14811686, conv2d_5_b, -0.7162557, 1.0370971, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.8264513099193493, 9); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 2.8264513099193493, conv2d_6_w, -0.06236292, 0.08321518, conv2d_6_b, -0.9067523, 0.9922458, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.507186658382409, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 2.507186658382409, conv2d_7_w, -0.06471479, 0.1024472, conv2d_7_b, -0.15943134, 0.7988499, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.550416946411133, 9); - void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 2.550416946411133, conv2d_8_w, -0.06320205, 0.08291938, conv2d_8_b, -0.32540628, 0.5203079, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.7303829237818675, 9); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 0.7303829237818675, conv2d_9_w, -0.037707984, 0.051601283, conv2d_9_b, -0.25622904, 0.11251946, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.32286912292241965, 9); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 0.32286912292241965, conv2d_10_w, -0.056007143, 0.09549151, conv2d_10_b, -0.11591503, 0.06267536, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.47936276525258825, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 0.47936276525258825, conv2d_11_w, -0.060094673, 0.10868926, conv2d_11_b, -0.105962686, 0.09584572, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.6409912902116734, 9); - void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 0.6409912902116734, conv2d_12_w, -0.034618977, 0.05792674, conv2d_12_b, -0.4237576, 0.11035452, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1027569955587349, 9); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 1.1027569955587349, conv2d_13_w, -0.035480656, 0.058295887, conv2d_13_b, -0.21477045, 0.14263579, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.4708798038959503, 9); - void* var_13 = FCLayer_PROMISE(var_12, 0.0, 2.4708798038959503, dense_1_w, -0.08929961, 0.11301676, dense_1_b, -0.20798548, 0.47405547, 1, 0.0, 2.8148007798194876, 9); - void* var_14 = FCLayer_PROMISE(var_13, 0.0, 2.8148007798194876, dense_2_w, -0.6627122, 0.35539475, dense_2_b, -1.0631907, 0.9830786, -1, -21.189617557525633, 22.645009384155276, 9); - void* var_15 = tensorSoftmax(var_14); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - //-- float accuracy = computeAccuracy2(labels, batch_size, var_15, 100); - float accuracy = computeTop5Accuracy(labels, batch_size, var_15, 100); - - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_valid.cc deleted file mode 100644 index b78c506e618535be50e92d9e77a59ecdd793d720..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar100_valid.cc +++ /dev/null @@ -1,134 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - int total_runs = 20; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 2500; - int offset = 0; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.7829767, 1.9456929, conv2d_1_w, -0.7450515, 0.71249133, conv2d_1_b, -1.5885142, 0.275554, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.7384350299835205, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.7384350299835205, conv2d_2_w, -0.30790088, 0.43504623, conv2d_2_b, -1.4242363, 1.2602744, 1, 1, 1, 1, 0, 2, 1, 0.0, 4.417154796123498, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 4.417154796123498, conv2d_3_w, -0.29189092, 0.26958522, conv2d_3_b, -1.0527138, 0.9075671, 1, 1, 1, 1, -1, 0, 1, 0.0, 3.1919608163833573, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 3.1919608163833573, conv2d_4_w, -0.15521508, 0.1829038, conv2d_4_b, -0.845419, 1.9358484, 1, 1, 1, 1, 0, 2, 1, 0.0, 5.108994026184064, 9); - void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 5.108994026184064, conv2d_5_w, -0.13149762, 0.14811686, conv2d_5_b, -0.7162557, 1.0370971, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.8264513099193493, 9); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 2.8264513099193493, conv2d_6_w, -0.06236292, 0.08321518, conv2d_6_b, -0.9067523, 0.9922458, 1, 1, 1, 1, -1, 0, 1, 0.0, 2.507186658382409, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 2.507186658382409, conv2d_7_w, -0.06471479, 0.1024472, conv2d_7_b, -0.15943134, 0.7988499, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.550416946411133, 9); - void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 2.550416946411133, conv2d_8_w, -0.06320205, 0.08291938, conv2d_8_b, -0.32540628, 0.5203079, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.7303829237818675, 9); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 0.7303829237818675, conv2d_9_w, -0.037707984, 0.051601283, conv2d_9_b, -0.25622904, 0.11251946, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.32286912292241965, 9); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 0.32286912292241965, conv2d_10_w, -0.056007143, 0.09549151, conv2d_10_b, -0.11591503, 0.06267536, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.47936276525258825, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 0.47936276525258825, conv2d_11_w, -0.060094673, 0.10868926, conv2d_11_b, -0.105962686, 0.09584572, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.6409912902116734, 9); - void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 0.6409912902116734, conv2d_12_w, -0.034618977, 0.05792674, conv2d_12_b, -0.4237576, 0.11035452, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1027569955587349, 9); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 1.1027569955587349, conv2d_13_w, -0.035480656, 0.058295887, conv2d_13_b, -0.21477045, 0.14263579, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.4708798038959503, 9); - void* var_13 = FCLayer_PROMISE(var_12, 0.0, 2.4708798038959503, dense_1_w, -0.08929961, 0.11301676, dense_1_b, -0.20798548, 0.47405547, 1, 0.0, 2.8148007798194876, 9); - void* var_14 = FCLayer_PROMISE(var_13, 0.0, 2.8148007798194876, dense_2_w, -0.6627122, 0.35539475, dense_2_b, -1.0631907, 0.9830786, -1, -21.189617557525633, 22.645009384155276, 9); - void* var_15 = tensorSoftmax(var_14); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_15, 100); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar10_valid.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar10_valid.cc deleted file mode 100644 index fbaea86634e2b561f78fd3971a731b1a734dcbaf..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/promise/vgg16_cifar10_valid.cc +++ /dev/null @@ -1,137 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - int total_runs = 20; - for (int i = 0 ; i < total_runs; i++){ - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 2500; - int offset = 0; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../model_params/vgg16_cifar10_2/"); - - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = ConvLayer_PROMISE(input, -1.8816367, 2.0934217, conv2d_1_w, -0.53275156, 0.49437004, conv2d_1_b, -0.6403629, 0.2490165, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.3590874671936035, 9); - void* var_1 = ConvLayer_PROMISE(var_0, 0.0, 1.3590874671936035, conv2d_2_w, -0.2688396, 0.20639156, conv2d_2_b, -0.7745511, 0.82006615, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.521231179237361, 9); - void* var_2 = ConvLayer_PROMISE(var_1, 0.0, 2.521231179237361, conv2d_3_w, -0.16776876, 0.14878987, conv2d_3_b, -0.35283303, 0.5154362, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.2011985784769053, 9); - void* var_3 = ConvLayer_PROMISE(var_2, 0.0, 1.2011985784769053, conv2d_4_w, -0.088948585, 0.114222586, conv2d_4_b, -0.30250227, 0.36856708, 1, 1, 1, 1, 0, 2, 1, 0.0, 1.0359880930185312, 9); - void* var_4 = ConvLayer_PROMISE(var_3, 0.0, 1.0359880930185312, conv2d_5_w, -0.07739562, 0.10973293, conv2d_5_b, -0.15568458, 0.17634983, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.3004955950379369, 9); - void* var_5 = ConvLayer_PROMISE(var_4, 0.0, 0.3004955950379369, conv2d_6_w, -0.051649556, 0.05435231, conv2d_6_b, -0.07395447, 0.07996062, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.11490475405007583, 9); - void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 0.11490475405007583, conv2d_7_w, -0.043513633, 0.07577866, conv2d_7_b, -0.06921874, 0.02660573, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.16232508487999475, 9); - void* var_7 = ConvLayer_PROMISE(var_6, 0.0, 0.16232508487999475, conv2d_8_w, -0.033842053, 0.045218028, conv2d_8_b, -0.022827804, 0.023845317, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.12424996573477909, 9); - void* var_8 = ConvLayer_PROMISE(var_7, 0.0, 0.12424996573477909, conv2d_9_w, -0.02211613, 0.032084666, conv2d_9_b, -0.02699063, 0.03773564, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.1746344865113496, 9); - void* var_9 = ConvLayer_PROMISE(var_8, 0.0, 0.1746344865113496, conv2d_10_w, -0.01979376, 0.034854397, conv2d_10_b, -0.036107242, 0.07056531, 1, 1, 1, 1, 0, 2, 1, 0.0, 0.5751757621765137, 9); - void* var_10 = ConvLayer_PROMISE(var_9, 0.0, 0.5751757621765137, conv2d_11_w, -0.03452098, 0.046055835, conv2d_11_b, -0.051925894, 0.07039055, 1, 1, 1, 1, -1, 0, 1, 0.0, 0.7718751144409115, 9); - void* var_11 = ConvLayer_PROMISE(var_10, 0.0, 0.7718751144409115, conv2d_12_w, -0.025946895, 0.040090334, conv2d_12_b, -0.06049362, 0.12658806, 1, 1, 1, 1, -1, 0, 1, 0.0, 1.1728516906499844, 9); - void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 1.1728516906499844, conv2d_13_w, -0.021766115, 0.03315237, conv2d_13_b, -0.20705001, 0.117947325, 1, 1, 1, 1, 0, 2, 1, 0.0, 2.0015769386291495, 9); - void* var_13 = FCLayer_PROMISE(var_12, 0.0, 2.0015769386291495, dense_1_w, -0.042597745, 0.046707444, dense_1_b, -0.21937433, 0.2545502, 1, 0.0, 2.002361118793486, 9); - void* var_14 = FCLayer_PROMISE(var_13, 0.0, 2.002361118793486, dense_2_w, -0.32550547, 0.30829763, dense_2_b, -1.1787822, 1.2378151, -1, -18.251470546722413, 24.17363445281988, 9); - void* var_15 = tensorSoftmax(var_14); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_15); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_approxhalf.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_approxhalf.cc deleted file mode 100644 index ecbfa322e9a26159e8a0bb6dad7b9cfa75f53711..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_approxhalf.cc +++ /dev/null @@ -1,154 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - - -/* NOTE: Reference Architecture to use for profiling */ -void testCifarNet(){ - - printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); - - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("norm_cifar_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin", - float_type, 32, 3, 3, 3); - void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin", - float_type, 32, 32, 3, 3); - void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin", - float_type, 1, 32, 1, 1); - void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin", - float_type, 64, 32, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin", - float_type, 1, 64, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin", - float_type, 64, 64, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin", - float_type, 1, 64, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin", - float_type, 128, 64, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin", - float_type, 1, 128, 1, 1); - void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin", - float_type, 128, 128, 3, 3); - void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin", - float_type, 1, 128, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin", - float_type, 1, 1, 2048, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 2500; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - // Perforates 50% filter elements and corresponding computations - void* conv1out = tensorConvApproxHalf2(input, conv1_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 1, 1, 2, 1); - - - tensorAdd(conv1out, conv1_bias); - void* conv1_tanh = tensorTanh(conv1out); - - // NOTE: Perforates 50% rows in the output - // 2nd Layer - void* conv2out = tensorConvApproxHalf2(conv1_tanh, conv2_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 2, 1, 1, 1); - tensorAdd(conv2out, conv2_bias); - void* conv2_tanh = tensorTanh(conv2out); - void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - - // NOTE: No Approxmation - all 1s passed - // 3rd Layer - void* conv3out = tensorConvApproxHalf2(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 1, 1, 1, 1); - tensorAdd(conv3out, conv3_bias); - void* conv3_tanh = tensorTanh(conv3out); - - // NOTE: No Approxmation - all 1s passed - // 4th Layer - void* conv4out = tensorConvApproxHalf2(conv3_tanh, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 1, 1, 1, 1); - tensorAdd(conv4out, conv4_bias); - void* conv4_tanh = tensorTanh(conv4out); - void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2); - - // NOTE: No Approxmation - all 1s passed - // 5th Layer - void* conv5out = tensorConvApproxHalf2(pool4out, conv5_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 1, 1, 1, 1); - tensorAdd(conv5out, conv5_bias); - void* conv5_tanh = tensorTanh(conv5out); - - // NOTE: No Approxmation - all 1s passed - // 6th Layer - void* conv6out = tensorConvApproxHalf2(conv5_tanh, conv6_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 1, 1, 1, 1); - tensorAdd(conv6out, conv6_bias); - - void* conv6_tanh = tensorTanh(conv6out); - void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2); - - // final FC Layer - void* gemm1out = tensorGemmGPU(pool6out, fc1_weights); - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - void* result = tensorSoftmax(gemm1biasout); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels, batch_size, result); - final_accuracy += accuracy; - - freeBatchMemory(); - } - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - -} - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - testCifarNet(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_sampsim.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_sampsim.cc deleted file mode 100644 index a0ac48b5ef41002760c2ad6cd8882639e98f1699..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet2_sampsim.cc +++ /dev/null @@ -1,148 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - - -/* NOTE: Reference Architecture to use for profiling */ -void testCifarNet(){ - - printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); - - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("norm_cifar_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin", - float_type, 32, 3, 3, 3); - void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin", - float_type, 32, 32, 3, 3); - void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin", - float_type, 1, 32, 1, 1); - void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin", - float_type, 64, 32, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin", - float_type, 1, 64, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin", - float_type, 64, 64, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin", - float_type, 1, 64, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin", - float_type, 128, 64, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin", - float_type, 1, 128, 1, 1); - void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin", - float_type, 128, 128, 3, 3); - void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin", - float_type, 1, 128, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin", - float_type, 1, 1, 2048, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 2500; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - void* conv1out = tensorConvSampSim(input, conv1_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 2, 0); - - - tensorAdd(conv1out, conv1_bias); - void* conv1_tanh = tensorTanh(conv1out); - - // 2nd Layer - void* conv2out = tensorConvApproxHalf(conv1_tanh, conv2_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 1, 1, 1, 1); - tensorAdd(conv2out, conv2_bias); - void* conv2_tanh = tensorTanh(conv2out); - void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 3rd Layer - void* conv3out = tensorConvApproxHalf(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 1, 1, 1, 1); - tensorAdd(conv3out, conv3_bias); - void* conv3_tanh = tensorTanh(conv3out); - - // 4th Layer - void* conv4out = tensorConvApproxHalf(conv3_tanh, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 1, 1, 1, 1); - tensorAdd(conv4out, conv4_bias); - void* conv4_tanh = tensorTanh(conv4out); - void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 5th Layer - void* conv5out = tensorConvApproxHalf(pool4out, conv5_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 1, 1, 1, 1); - tensorAdd(conv5out, conv5_bias); - void* conv5_tanh = tensorTanh(conv5out); - - // 6th Layer - void* conv6out = tensorConvApproxHalf(conv5_tanh, conv6_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 1, 1, 1, 1); - tensorAdd(conv6out, conv6_bias); - - void* conv6_tanh = tensorTanh(conv6out); - void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2); - - // final FC Layer - void* gemm1out = tensorGemmGPU(pool6out, fc1_weights); - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - void* result = tensorSoftmax(gemm1biasout); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels, batch_size, result); - final_accuracy += accuracy; - - freeBatchMemory(); - } - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - -} - - -int main(int argc, char* argv[]){ - - llvm_hpvm_initTensorRt(0); - - testCifarNet(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_approx.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_approx.cc deleted file mode 100644 index 7713b8105ac0f9bc6f1dae6899548599e5ede0ce..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_approx.cc +++ /dev/null @@ -1,196 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testCifarNet(){ - - int total_runs = 100; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* CIFAR-10 DNN ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 1000; //5000 - - //uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size); - uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size); - - void* input = readTrainedWeights("../model_params/alexnet_cifar10/norm_cifar_input.bin", - float_type, - test_batch_size, 3, 32, 32); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv1.bin", - float_type, 64, 3, 11, 11); - void* conv1_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv1_bias.bin", - float_type, 1, 64, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv2.bin", - float_type, 192, 64, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv2_bias.bin", - float_type, 1, 192, 1, 1); - - void* conv3_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv3.bin", - float_type, 384, 192, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv3_bias.bin", - float_type, 1, 384, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv4.bin", - float_type, 256, 384, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv4_bias.bin", - float_type, 1, 256, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv5.bin", - float_type, 256, 256, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv5_bias.bin", - float_type, 1, 256, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet_cifar10/fc1.bin", - float_type, 1, 1, 4096, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performnce profiling - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorConvPerf(input, conv1_filter, 5, 5, 1, 1, - conv_mode, conv_precision, 0, 0); - - tensorAdd(conv1out, conv1_bias); // NOTE: In place operation - - void* conv1_tanh = tensorTanh(conv1out); - - void* pool1out = tensorPooling(conv1_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 2nd Layer - void* conv2out = tensorConvPerf(pool1out, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision, 0, 0); - tensorAdd(conv2out, conv2_bias); // NOTE: In place operation - - void* conv2_tanh = tensorTanh(conv2out); - - void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - - - // 3rd Layer - void* conv3out = tensorConvPerf(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 0, 0); - tensorAdd(conv3out, conv3_bias); // NOTE: In place operation - - void* conv3_tanh = tensorTanh(conv3out); - - // 4th Layer - void* conv4out = tensorConvPerf(conv3_tanh, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 2, 0); - tensorAdd(conv4out, conv4_bias); // NOTE: In place operation - - void* conv4_tanh = tensorTanh(conv4out); - - // 5th Layer - void* conv5out = tensorConvPerf(conv4_tanh, conv5_filter, 1, 1, 1, 1, - conv_mode, conv_precision, 0, 0); - tensorAdd(conv5out, conv5_bias); // NOTE: In place operation - - void* conv5_tanh = tensorTanh(conv5out); - - void* pool5out = tensorPooling(conv5_tanh, 0, 2, 2, 0, 0, 2, 2); - - // final FC Layer - void* gemm1out = tensorGemmGPU(pool5out, fc1_weights); - - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - - void* result = tensorSoftmax(gemm1biasout); - - printTensorDims(result); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(1); - - testCifarNet(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_old.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_old.cc deleted file mode 100644 index 3e5cec7d0760252ebff1b31293a51bdf570415f4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/alexnet_cifar10_old.cc +++ /dev/null @@ -1,196 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testCifarNet(){ - - int total_runs = 100; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* CIFAR-10 DNN ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 5000; - - //uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size); - uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size); - - void* input = readTrainedWeights("../model_params/alexnet_cifar10/norm_cifar_input.bin", - float_type, - test_batch_size, 3, 32, 32); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv1.bin", - float_type, 64, 3, 11, 11); - void* conv1_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv1_bias.bin", - float_type, 1, 64, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv2.bin", - float_type, 192, 64, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv2_bias.bin", - float_type, 1, 192, 1, 1); - - void* conv3_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv3.bin", - float_type, 384, 192, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv3_bias.bin", - float_type, 1, 384, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv4.bin", - float_type, 256, 384, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv4_bias.bin", - float_type, 1, 256, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv5.bin", - float_type, 256, 256, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv5_bias.bin", - float_type, 1, 256, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet_cifar10/fc1.bin", - float_type, 1, 1, 4096, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performnce profiling - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorConvolution(input, conv1_filter, 5, 5, 1, 1, - conv_mode, conv_precision); - - tensorAdd(conv1out, conv1_bias); // NOTE: In place operation - - void* conv1_tanh = tensorTanh(conv1out); - - void* pool1out = tensorPooling(conv1_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 2nd Layer - void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); // NOTE: In place operation - - void* conv2_tanh = tensorTanh(conv2out); - - void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - - - // 3rd Layer - void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv3out, conv3_bias); // NOTE: In place operation - - void* conv3_tanh = tensorTanh(conv3out); - - // 4th Layer - void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv4out, conv4_bias); // NOTE: In place operation - - void* conv4_tanh = tensorTanh(conv4out); - - // 5th Layer - void* conv5out = tensorConvolution(conv4_tanh, conv5_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv5out, conv5_bias); // NOTE: In place operation - - void* conv5_tanh = tensorTanh(conv5out); - - void* pool5out = tensorPooling(conv5_tanh, 0, 2, 2, 0, 0, 2, 2); - - // final FC Layer - void* gemm1out = tensorGemmGPU(pool5out, fc1_weights); - - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - - void* result = tensorSoftmax(gemm1biasout); - - printTensorDims(result); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(1); - - testCifarNet(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/depthwise_batchnorm.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/depthwise_batchnorm.cc deleted file mode 100644 index 84710565de3b2fdde6eca5d84c9e3f324eba1d50..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/depthwise_batchnorm.cc +++ /dev/null @@ -1,102 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/depthwise_batchnorm2/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string depthwise_conv2d_1_b_path = dir_prefix + std::string("depthwise_conv2d_1_b.bin"); - void* depthwise_conv2d_1_b = readTrainedWeights(depthwise_conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,6272,1024); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 10000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 1); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorBatchNorm(var_2,batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_4 = tensorPooling(var_3,0,2,2,0,0,2,2); - void* var_5 = tensorConvolution(var_4, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_6 = tensorAdd(var_5, depthwise_conv2d_1_b); - void* var_7 = tensorRelu(var_6); - void* var_8 = tensorBatchNorm(var_7,batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_10 = tensorGemmGPU(var_8, dense_1_w); - void* var_11 = tensorAdd(var_10, dense_1_b); - void* var_12 = tensorRelu(var_11); - void* var_13 = tensorGemmGPU(var_12, dense_2_w); - void* var_14 = tensorAdd(var_13, dense_2_b); - void* var_15 = tensorRelu(var_14); - void* var_16 = tensorSoftmax(var_15); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_16); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/fc2_clipped_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/fc2_clipped_promise.cc deleted file mode 100644 index d7addd7283e24bedfc32d57d84c4ce17d9966f57..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/fc2_clipped_promise.cc +++ /dev/null @@ -1,80 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" -#include "../../include/types.h" - - - -void test_2_Layer_clipped_FC(){ - - printf("********* 2-Layer FC with clipped activations and weights ********* \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - - void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeights("../model_params/fc2_clipped/fc1.bin", - float_type, 1, 1, 784, 128); - void* fc1_bias = readTrainedWeights("../model_params/fc2_clipped/fc1_bias.bin", - float_type, 1, 128, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/fc2_clipped/fc2.bin", - float_type, 1, 1, 128, 10); - void* fc2_bias = readTrainedWeights("../model_params/fc2_clipped/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - // Start execution profiling Tensor ops - startProfiling(); - - // Layer-1 - void* fc1out = tensorGemmGPU(input, fc1_weights); - printTensorDims(fc1out); - dumpWeightsToFile("tensors_out2/fc1out.out", fc1out); - - void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); - printTensorDims(fc1_bias_out); - dumpWeightsToFile("tensors_out2/fc1_bias.out", fc1_bias_out); - - void* fc1_relu = tensorRelu2(fc1_bias_out, 0, 2); - printTensorDims(fc1_relu); - dumpWeightsToFile("tensors_out2/fc1_clipped_relu.out", fc1_relu); - - // Layer-2 - void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); - printTensorDims(fc2out); - dumpWeightsToFile("tensors_out2/fc2out.out", fc2out); - - - void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); - printTensorDims(fc2_bias_out); - - void* fc2_relu = tensorRelu2(fc2_bias_out, 0, 2); - printTensorDims(fc2_relu); - - void* result = tensorSoftmax(fc2_relu); - printTensorDims(result); - - stopProfiling(); - - computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); -} - - - -int main(){ - - // This initializes the runtime - must be called before anything - llvm_hpvm_initTensorRt(0); - - test_2_Layer_clipped_FC(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet2_tanh.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet2_tanh.cc deleted file mode 100644 index d2d663552fdab6366f28655ca835ba63cb4fcee4..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet2_tanh.cc +++ /dev/null @@ -1,171 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetTanh(){ - - int total_runs = 1; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* Lenet-2 Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 5000; - - uint8_t* labels = readLabels("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size); - - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet_tanh2/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_tanh2/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_tanh2/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_tanh2/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_tanh2/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet_tanh2/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet_tanh2/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet_tanh2/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performnce profiling - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - - // NOTE: For tensorAdd, the only dimension that MUST match is channels - tensorAdd(conv1out, conv1_bias); // NOTE: In place operation - - void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); - - void* conv1_tanh = tensorTanh(pool1out); - - // NOTE: input channels have to match between tensor op inputs and outputs - void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); // NOTE: In place operation - - void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); - - void* conv2_tanh = tensorTanh(pool2out); - - void* gemm1out = tensorGemmGPU(conv2_tanh, fc1_weights); - - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - - void* tanh1out = tensorTanh(gemm1biasout); - - void* gemm2out = tensorGemmGPU(tanh1out, fc2_weights); - - void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); - - void* tanh2out = tensorTanh(gemm2_biasout); - - void* result = tensorSoftmax(tanh2out); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } - - } - - - -} - - -int main(int argc, char* argv[]){ - - if(argc > 1) - Opentuner_run = true; - - llvm_hpvm_initTensorRt(0); - - testLenetTanh(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_front.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_front.cc deleted file mode 100644 index effb293a8b63119015ed8dbf5f8938eb62f2f89c..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_front.cc +++ /dev/null @@ -1,61 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/lenet_front/"); - std::string input_path = dir_prefix + std::string("input.bin"); - void* input = readTrainedWeights(input_path.c_str(), 0,10000,1,28,28); - std::string labels_path = dir_prefix + std::string("labels.bin"); - uint8_t* labels = readLabels(labels_path.c_str(),10000); - std::string conv2d_1_w_path = dir_prefix + std::string("conv0.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); - std::string conv2d_1_b_path = dir_prefix + std::string("conv_bias0.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv_bias2.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("fc5.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); - std::string dense_1_b_path = dir_prefix + std::string("fc_bias5.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); - std::string dense_2_w_path = dir_prefix + std::string("fc6.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); - std::string dense_2_b_path = dir_prefix + std::string("fc_bias6.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 0); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorTanh(var_1); - void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); - void* var_4 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); - void* var_5 = tensorAdd(var_4, conv2d_2_b); - void* var_6 = tensorTanh(var_5); - void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); - void* var_9 = tensorGemmGPU(var_7, dense_1_w); - void* var_10 = tensorAdd(var_9, dense_1_b); - void* var_11 = tensorTanh(var_10); - void* var_12 = tensorGemmGPU(var_11, dense_2_w); - void* var_13 = tensorAdd(var_12, dense_2_b); - void* var_14 = tensorTanh(var_13); - void* var_15 = tensorSoftmax(var_14); - - computeAccuracy2(labels, 10000, var_15); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_int32.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_int32.cc deleted file mode 100644 index 03a4137004fe063a4536efec8fa7ecf2d8d2b374..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_int32.cc +++ /dev/null @@ -1,91 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../../keras/data/lenet_test_8/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 10000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,1,28,28); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 2, 2, 1, 1, 1, 1); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); - void* var_4 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 1); - void* var_5 = tensorAdd(var_4, conv2d_2_b); - void* var_6 = tensorRelu(var_5); - void* var_8 = tensorConvolution(var_6, conv2d_3_w, 1, 1, 2, 2, 1, 1); - void* var_9 = tensorAdd(var_8, conv2d_3_b); - void* var_10 = tensorRelu(var_9); - void* var_12 = tensorGemmGPU(var_10, dense_1_w); - void* var_13 = tensorAdd(var_12, dense_1_b); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorGemmGPU(var_14, dense_2_w); - void* var_16 = tensorAdd(var_15, dense_2_b); - void* var_17 = tensorRelu(var_16); - void* var_18 = tensorSoftmax(var_17); - - uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy3(labels, var_18); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_layers.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_layers.cc deleted file mode 100644 index a6b777e36c1b31440a3ad7d227df4915b1cc27df..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/lenet_layers.cc +++ /dev/null @@ -1,61 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -int main(){ - - llvm_hpvm_initializeRuntimeController("tuner_confs.txt", "quant_ranges_rt.txt"); - llvm_hpvm_initApproxhpvmRt(0); - - - std::string dir_prefix = std::string("../model_params/lenet_relu/"); - std::string input_path = dir_prefix + std::string("input.bin"); - void* input = readTrainedWeights(input_path.c_str(), 0,10000,1,28,28); - std::string labels_path = dir_prefix + std::string("labels.bin"); - uint8_t* labels = readLabels(labels_path.c_str(),10000); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - void* var_0 = wrapper_ConvLayer("1", input, conv2d_1_w, conv2d_1_b, 2, 2, 1, 1, 0, 2, 1, 0, 0); - void* var_1 = wrapper_ConvLayer("2", var_0, conv2d_2_w, conv2d_2_b, 2, 2, 1, 1, -1, 0, 1, 0, 0); - void* var_2 = wrapper_ConvLayer("3", var_1, conv2d_3_w, conv2d_3_b, 1, 1, 2, 2, -1, 0, 1, 0, 0); - void* var_3 = wrapper_FCLayer("4", var_2, dense_1_w, dense_1_b, 1, 0, 0); - void* var_4 = wrapper_FCLayer("5", var_3, dense_2_w, dense_2_b, 1, 0, 0); - void* var_5 = tensorSoftmax(var_4); - - computeAccuracy2(labels,10000,var_5); - - - llvm_hpvm_cleanupApproxhpvmRt(); - llvm_hpvm_clearRuntimeController(); - - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mio_test.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mio_test.cc deleted file mode 100644 index 646582146e1fd4b4819ee47a071d630428ed7f70..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mio_test.cc +++ /dev/null @@ -1,98 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/hpvm_mio/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1600,256); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,256,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,256,5); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,5,1,1); - - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 5000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 0, 0, 1, 1, 1, 1); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorConvolution(var_2, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_4 = tensorAdd(var_3, conv2d_2_b); - void* var_5 = tensorRelu(var_4); - void* var_6 = tensorPooling(var_5,0,2,2,0,0,2,2); - void* var_8 = tensorConvolution(var_6, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_9 = tensorAdd(var_8, conv2d_3_b); - void* var_10 = tensorRelu(var_9); - void* var_11 = tensorConvolution(var_10, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_12 = tensorAdd(var_11, conv2d_4_b); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorPooling(var_13,0,2,2,0,0,2,2); - void* var_17 = tensorGemmGPU(var_14, dense_1_w); - void* var_18 = tensorAdd(var_17, dense_1_b); - void* var_19 = tensorRelu(var_18); - void* var_21 = tensorGemmGPU(var_19, dense_2_w); - void* var_22 = tensorAdd(var_21, dense_2_b); - void* var_23 = tensorSoftmax(var_22); - - uint32_t* labels = readLabelsBatch3(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy3(labels, var_23); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_old.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_old.cc deleted file mode 100644 index ba7af9846916057fedc05757bdad77fefb01590e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_old.cc +++ /dev/null @@ -1,413 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(1); - - - std::string dir_prefix = std::string("../model_params/mobilenet_hpvm_3/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_7_w_path = dir_prefix + std::string("depthwise_conv2d_7_w.bin"); - void* depthwise_conv2d_7_w = readTrainedWeights(depthwise_conv2d_7_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_14_gamma_path = dir_prefix + std::string("batch_normalization_14_gamma.bin"); - void* batch_normalization_14_gamma = readTrainedWeights(batch_normalization_14_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_beta_path = dir_prefix + std::string("batch_normalization_14_beta.bin"); - void* batch_normalization_14_beta = readTrainedWeights(batch_normalization_14_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_mean_path = dir_prefix + std::string("batch_normalization_14_mean.bin"); - void* batch_normalization_14_mean = readTrainedWeights(batch_normalization_14_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_14_variance_path = dir_prefix + std::string("batch_normalization_14_variance.bin"); - void* batch_normalization_14_variance = readTrainedWeights(batch_normalization_14_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_15_gamma_path = dir_prefix + std::string("batch_normalization_15_gamma.bin"); - void* batch_normalization_15_gamma = readTrainedWeights(batch_normalization_15_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_beta_path = dir_prefix + std::string("batch_normalization_15_beta.bin"); - void* batch_normalization_15_beta = readTrainedWeights(batch_normalization_15_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_mean_path = dir_prefix + std::string("batch_normalization_15_mean.bin"); - void* batch_normalization_15_mean = readTrainedWeights(batch_normalization_15_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_15_variance_path = dir_prefix + std::string("batch_normalization_15_variance.bin"); - void* batch_normalization_15_variance = readTrainedWeights(batch_normalization_15_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_8_w_path = dir_prefix + std::string("depthwise_conv2d_8_w.bin"); - void* depthwise_conv2d_8_w = readTrainedWeights(depthwise_conv2d_8_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_16_gamma_path = dir_prefix + std::string("batch_normalization_16_gamma.bin"); - void* batch_normalization_16_gamma = readTrainedWeights(batch_normalization_16_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_beta_path = dir_prefix + std::string("batch_normalization_16_beta.bin"); - void* batch_normalization_16_beta = readTrainedWeights(batch_normalization_16_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_mean_path = dir_prefix + std::string("batch_normalization_16_mean.bin"); - void* batch_normalization_16_mean = readTrainedWeights(batch_normalization_16_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_16_variance_path = dir_prefix + std::string("batch_normalization_16_variance.bin"); - void* batch_normalization_16_variance = readTrainedWeights(batch_normalization_16_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_17_gamma_path = dir_prefix + std::string("batch_normalization_17_gamma.bin"); - void* batch_normalization_17_gamma = readTrainedWeights(batch_normalization_17_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_beta_path = dir_prefix + std::string("batch_normalization_17_beta.bin"); - void* batch_normalization_17_beta = readTrainedWeights(batch_normalization_17_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_mean_path = dir_prefix + std::string("batch_normalization_17_mean.bin"); - void* batch_normalization_17_mean = readTrainedWeights(batch_normalization_17_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_17_variance_path = dir_prefix + std::string("batch_normalization_17_variance.bin"); - void* batch_normalization_17_variance = readTrainedWeights(batch_normalization_17_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_9_w_path = dir_prefix + std::string("depthwise_conv2d_9_w.bin"); - void* depthwise_conv2d_9_w = readTrainedWeights(depthwise_conv2d_9_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_18_gamma_path = dir_prefix + std::string("batch_normalization_18_gamma.bin"); - void* batch_normalization_18_gamma = readTrainedWeights(batch_normalization_18_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_beta_path = dir_prefix + std::string("batch_normalization_18_beta.bin"); - void* batch_normalization_18_beta = readTrainedWeights(batch_normalization_18_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_mean_path = dir_prefix + std::string("batch_normalization_18_mean.bin"); - void* batch_normalization_18_mean = readTrainedWeights(batch_normalization_18_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_18_variance_path = dir_prefix + std::string("batch_normalization_18_variance.bin"); - void* batch_normalization_18_variance = readTrainedWeights(batch_normalization_18_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_19_gamma_path = dir_prefix + std::string("batch_normalization_19_gamma.bin"); - void* batch_normalization_19_gamma = readTrainedWeights(batch_normalization_19_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_beta_path = dir_prefix + std::string("batch_normalization_19_beta.bin"); - void* batch_normalization_19_beta = readTrainedWeights(batch_normalization_19_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_mean_path = dir_prefix + std::string("batch_normalization_19_mean.bin"); - void* batch_normalization_19_mean = readTrainedWeights(batch_normalization_19_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_19_variance_path = dir_prefix + std::string("batch_normalization_19_variance.bin"); - void* batch_normalization_19_variance = readTrainedWeights(batch_normalization_19_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_10_w_path = dir_prefix + std::string("depthwise_conv2d_10_w.bin"); - void* depthwise_conv2d_10_w = readTrainedWeights(depthwise_conv2d_10_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_20_gamma_path = dir_prefix + std::string("batch_normalization_20_gamma.bin"); - void* batch_normalization_20_gamma = readTrainedWeights(batch_normalization_20_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_beta_path = dir_prefix + std::string("batch_normalization_20_beta.bin"); - void* batch_normalization_20_beta = readTrainedWeights(batch_normalization_20_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_mean_path = dir_prefix + std::string("batch_normalization_20_mean.bin"); - void* batch_normalization_20_mean = readTrainedWeights(batch_normalization_20_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_20_variance_path = dir_prefix + std::string("batch_normalization_20_variance.bin"); - void* batch_normalization_20_variance = readTrainedWeights(batch_normalization_20_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_21_gamma_path = dir_prefix + std::string("batch_normalization_21_gamma.bin"); - void* batch_normalization_21_gamma = readTrainedWeights(batch_normalization_21_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_beta_path = dir_prefix + std::string("batch_normalization_21_beta.bin"); - void* batch_normalization_21_beta = readTrainedWeights(batch_normalization_21_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_mean_path = dir_prefix + std::string("batch_normalization_21_mean.bin"); - void* batch_normalization_21_mean = readTrainedWeights(batch_normalization_21_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_21_variance_path = dir_prefix + std::string("batch_normalization_21_variance.bin"); - void* batch_normalization_21_variance = readTrainedWeights(batch_normalization_21_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_11_w_path = dir_prefix + std::string("depthwise_conv2d_11_w.bin"); - void* depthwise_conv2d_11_w = readTrainedWeights(depthwise_conv2d_11_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_22_gamma_path = dir_prefix + std::string("batch_normalization_22_gamma.bin"); - void* batch_normalization_22_gamma = readTrainedWeights(batch_normalization_22_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_beta_path = dir_prefix + std::string("batch_normalization_22_beta.bin"); - void* batch_normalization_22_beta = readTrainedWeights(batch_normalization_22_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_mean_path = dir_prefix + std::string("batch_normalization_22_mean.bin"); - void* batch_normalization_22_mean = readTrainedWeights(batch_normalization_22_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_22_variance_path = dir_prefix + std::string("batch_normalization_22_variance.bin"); - void* batch_normalization_22_variance = readTrainedWeights(batch_normalization_22_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,1,1); - std::string batch_normalization_23_gamma_path = dir_prefix + std::string("batch_normalization_23_gamma.bin"); - void* batch_normalization_23_gamma = readTrainedWeights(batch_normalization_23_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_beta_path = dir_prefix + std::string("batch_normalization_23_beta.bin"); - void* batch_normalization_23_beta = readTrainedWeights(batch_normalization_23_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_mean_path = dir_prefix + std::string("batch_normalization_23_mean.bin"); - void* batch_normalization_23_mean = readTrainedWeights(batch_normalization_23_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_23_variance_path = dir_prefix + std::string("batch_normalization_23_variance.bin"); - void* batch_normalization_23_variance = readTrainedWeights(batch_normalization_23_variance_path.c_str(), 0,1,512,1,1); - std::string depthwise_conv2d_12_w_path = dir_prefix + std::string("depthwise_conv2d_12_w.bin"); - void* depthwise_conv2d_12_w = readTrainedWeights(depthwise_conv2d_12_w_path.c_str(), 0,512,1,3,3); - std::string batch_normalization_24_gamma_path = dir_prefix + std::string("batch_normalization_24_gamma.bin"); - void* batch_normalization_24_gamma = readTrainedWeights(batch_normalization_24_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_beta_path = dir_prefix + std::string("batch_normalization_24_beta.bin"); - void* batch_normalization_24_beta = readTrainedWeights(batch_normalization_24_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_mean_path = dir_prefix + std::string("batch_normalization_24_mean.bin"); - void* batch_normalization_24_mean = readTrainedWeights(batch_normalization_24_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_24_variance_path = dir_prefix + std::string("batch_normalization_24_variance.bin"); - void* batch_normalization_24_variance = readTrainedWeights(batch_normalization_24_variance_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,1024,512,1,1); - std::string batch_normalization_25_gamma_path = dir_prefix + std::string("batch_normalization_25_gamma.bin"); - void* batch_normalization_25_gamma = readTrainedWeights(batch_normalization_25_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_beta_path = dir_prefix + std::string("batch_normalization_25_beta.bin"); - void* batch_normalization_25_beta = readTrainedWeights(batch_normalization_25_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_mean_path = dir_prefix + std::string("batch_normalization_25_mean.bin"); - void* batch_normalization_25_mean = readTrainedWeights(batch_normalization_25_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_25_variance_path = dir_prefix + std::string("batch_normalization_25_variance.bin"); - void* batch_normalization_25_variance = readTrainedWeights(batch_normalization_25_variance_path.c_str(), 0,1,1024,1,1); - std::string depthwise_conv2d_13_w_path = dir_prefix + std::string("depthwise_conv2d_13_w.bin"); - void* depthwise_conv2d_13_w = readTrainedWeights(depthwise_conv2d_13_w_path.c_str(), 0,1024,1,3,3); - std::string batch_normalization_26_gamma_path = dir_prefix + std::string("batch_normalization_26_gamma.bin"); - void* batch_normalization_26_gamma = readTrainedWeights(batch_normalization_26_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_beta_path = dir_prefix + std::string("batch_normalization_26_beta.bin"); - void* batch_normalization_26_beta = readTrainedWeights(batch_normalization_26_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_mean_path = dir_prefix + std::string("batch_normalization_26_mean.bin"); - void* batch_normalization_26_mean = readTrainedWeights(batch_normalization_26_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_26_variance_path = dir_prefix + std::string("batch_normalization_26_variance.bin"); - void* batch_normalization_26_variance = readTrainedWeights(batch_normalization_26_variance_path.c_str(), 0,1,1024,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,1024,1024,1,1); - std::string batch_normalization_27_gamma_path = dir_prefix + std::string("batch_normalization_27_gamma.bin"); - void* batch_normalization_27_gamma = readTrainedWeights(batch_normalization_27_gamma_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_beta_path = dir_prefix + std::string("batch_normalization_27_beta.bin"); - void* batch_normalization_27_beta = readTrainedWeights(batch_normalization_27_beta_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_mean_path = dir_prefix + std::string("batch_normalization_27_mean.bin"); - void* batch_normalization_27_mean = readTrainedWeights(batch_normalization_27_mean_path.c_str(), 0,1,1024,1,1); - std::string batch_normalization_27_variance_path = dir_prefix + std::string("batch_normalization_27_variance.bin"); - void* batch_normalization_27_variance = readTrainedWeights(batch_normalization_27_variance_path.c_str(), 0,1,1024,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,1024,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - - startMemTracking(); - - int test_input_size = 3000; - int batch_size = 1000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_5 = tensorRelu(var_4); - void* var_6 = tensorConvolution(var_5, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_8 = tensorRelu(var_7); - void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_11 = tensorRelu(var_10); - void* var_12 = tensorConvolution(var_11, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_17 = tensorRelu(var_16); - void* var_18 = tensorConvolution(var_17, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_22 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_23 = tensorBatchNorm(var_22, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_24 = tensorRelu(var_23); - void* var_25 = tensorConvolution(var_24, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_26 = tensorBatchNorm(var_25, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_27 = tensorRelu(var_26); - void* var_28 = tensorConvolution(var_27, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_29 = tensorBatchNorm(var_28, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_30 = tensorRelu(var_29); - void* var_31 = tensorConvolution(var_30, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_32 = tensorBatchNorm(var_31, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_33 = tensorRelu(var_32); - void* var_35 = tensorConvolution(var_33, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_36 = tensorBatchNorm(var_35, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_37 = tensorRelu(var_36); - void* var_38 = tensorConvolution(var_37, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_39 = tensorBatchNorm(var_38, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_40 = tensorRelu(var_39); - void* var_41 = tensorConvolution(var_40, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_8_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorBatchNorm(var_44, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_46 = tensorRelu(var_45); - void* var_47 = tensorConvolution(var_46, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_48 = tensorBatchNorm(var_47, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_49 = tensorRelu(var_48); - void* var_50 = tensorConvolution(var_49, conv2d_9_w, 0, 0, 1, 1, 1, 1); - void* var_51 = tensorBatchNorm(var_50, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_52 = tensorRelu(var_51); - void* var_54 = tensorConvolution(var_52, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_55 = tensorBatchNorm(var_54, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_56 = tensorRelu(var_55); - void* var_57 = tensorConvolution(var_56, conv2d_10_w, 0, 0, 1, 1, 1, 1); - void* var_58 = tensorBatchNorm(var_57, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_59 = tensorRelu(var_58); - void* var_60 = tensorConvolution(var_59, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_61 = tensorBatchNorm(var_60, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_62 = tensorRelu(var_61); - void* var_63 = tensorConvolution(var_62, conv2d_11_w, 0, 0, 1, 1, 1, 1); - void* var_64 = tensorBatchNorm(var_63, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); - void* var_65 = tensorRelu(var_64); - void* var_66 = tensorConvolution(var_65, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_68 = tensorRelu(var_67); - void* var_69 = tensorConvolution(var_68, conv2d_12_w, 0, 0, 1, 1, 1, 1); - void* var_70 = tensorBatchNorm(var_69, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_71 = tensorRelu(var_70); - void* var_73 = tensorConvolution(var_71, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_74 = tensorBatchNorm(var_73, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_75 = tensorRelu(var_74); - void* var_76 = tensorConvolution(var_75, conv2d_13_w, 0, 0, 1, 1, 1, 1); - void* var_77 = tensorBatchNorm(var_76, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_78 = tensorRelu(var_77); - void* var_79 = tensorConvolution(var_78, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_80 = tensorBatchNorm(var_79, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_81 = tensorRelu(var_80); - void* var_82 = tensorConvolution(var_81, conv2d_14_w, 0, 0, 1, 1, 1, 1); - void* var_83 = tensorBatchNorm(var_82, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_84 = tensorRelu(var_83); - void* var_86 = tensorPooling(var_84,1,2,2,0,0,2,2); - void* var_88 = tensorGemmGPU(var_86, dense_1_w); - void* var_89 = tensorAdd(var_88, dense_1_b); - void* var_90 = tensorSoftmax(var_89); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_90); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_shallow2.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_shallow2.cc deleted file mode 100644 index ee2c51b2399505d3a98b54920d9700dbd0548b86..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/mobilenet_shallow2.cc +++ /dev/null @@ -1,231 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/mobilenet_shallow2/"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,3,3,3); - std::string batch_normalization_1_gamma_path = dir_prefix + std::string("batch_normalization_1_gamma.bin"); - void* batch_normalization_1_gamma = readTrainedWeights(batch_normalization_1_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_beta_path = dir_prefix + std::string("batch_normalization_1_beta.bin"); - void* batch_normalization_1_beta = readTrainedWeights(batch_normalization_1_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_mean_path = dir_prefix + std::string("batch_normalization_1_mean.bin"); - void* batch_normalization_1_mean = readTrainedWeights(batch_normalization_1_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_1_variance_path = dir_prefix + std::string("batch_normalization_1_variance.bin"); - void* batch_normalization_1_variance = readTrainedWeights(batch_normalization_1_variance_path.c_str(), 0,1,32,1,1); - std::string depthwise_conv2d_1_w_path = dir_prefix + std::string("depthwise_conv2d_1_w.bin"); - void* depthwise_conv2d_1_w = readTrainedWeights(depthwise_conv2d_1_w_path.c_str(), 0,32,1,3,3); - std::string batch_normalization_2_gamma_path = dir_prefix + std::string("batch_normalization_2_gamma.bin"); - void* batch_normalization_2_gamma = readTrainedWeights(batch_normalization_2_gamma_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_beta_path = dir_prefix + std::string("batch_normalization_2_beta.bin"); - void* batch_normalization_2_beta = readTrainedWeights(batch_normalization_2_beta_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_mean_path = dir_prefix + std::string("batch_normalization_2_mean.bin"); - void* batch_normalization_2_mean = readTrainedWeights(batch_normalization_2_mean_path.c_str(), 0,1,32,1,1); - std::string batch_normalization_2_variance_path = dir_prefix + std::string("batch_normalization_2_variance.bin"); - void* batch_normalization_2_variance = readTrainedWeights(batch_normalization_2_variance_path.c_str(), 0,1,32,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,1,1); - std::string batch_normalization_3_gamma_path = dir_prefix + std::string("batch_normalization_3_gamma.bin"); - void* batch_normalization_3_gamma = readTrainedWeights(batch_normalization_3_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_beta_path = dir_prefix + std::string("batch_normalization_3_beta.bin"); - void* batch_normalization_3_beta = readTrainedWeights(batch_normalization_3_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_mean_path = dir_prefix + std::string("batch_normalization_3_mean.bin"); - void* batch_normalization_3_mean = readTrainedWeights(batch_normalization_3_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_3_variance_path = dir_prefix + std::string("batch_normalization_3_variance.bin"); - void* batch_normalization_3_variance = readTrainedWeights(batch_normalization_3_variance_path.c_str(), 0,1,64,1,1); - std::string depthwise_conv2d_2_w_path = dir_prefix + std::string("depthwise_conv2d_2_w.bin"); - void* depthwise_conv2d_2_w = readTrainedWeights(depthwise_conv2d_2_w_path.c_str(), 0,64,1,3,3); - std::string batch_normalization_4_gamma_path = dir_prefix + std::string("batch_normalization_4_gamma.bin"); - void* batch_normalization_4_gamma = readTrainedWeights(batch_normalization_4_gamma_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_beta_path = dir_prefix + std::string("batch_normalization_4_beta.bin"); - void* batch_normalization_4_beta = readTrainedWeights(batch_normalization_4_beta_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_mean_path = dir_prefix + std::string("batch_normalization_4_mean.bin"); - void* batch_normalization_4_mean = readTrainedWeights(batch_normalization_4_mean_path.c_str(), 0,1,64,1,1); - std::string batch_normalization_4_variance_path = dir_prefix + std::string("batch_normalization_4_variance.bin"); - void* batch_normalization_4_variance = readTrainedWeights(batch_normalization_4_variance_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,1,1); - std::string batch_normalization_5_gamma_path = dir_prefix + std::string("batch_normalization_5_gamma.bin"); - void* batch_normalization_5_gamma = readTrainedWeights(batch_normalization_5_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_beta_path = dir_prefix + std::string("batch_normalization_5_beta.bin"); - void* batch_normalization_5_beta = readTrainedWeights(batch_normalization_5_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_mean_path = dir_prefix + std::string("batch_normalization_5_mean.bin"); - void* batch_normalization_5_mean = readTrainedWeights(batch_normalization_5_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_5_variance_path = dir_prefix + std::string("batch_normalization_5_variance.bin"); - void* batch_normalization_5_variance = readTrainedWeights(batch_normalization_5_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_3_w_path = dir_prefix + std::string("depthwise_conv2d_3_w.bin"); - void* depthwise_conv2d_3_w = readTrainedWeights(depthwise_conv2d_3_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_6_gamma_path = dir_prefix + std::string("batch_normalization_6_gamma.bin"); - void* batch_normalization_6_gamma = readTrainedWeights(batch_normalization_6_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_beta_path = dir_prefix + std::string("batch_normalization_6_beta.bin"); - void* batch_normalization_6_beta = readTrainedWeights(batch_normalization_6_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_mean_path = dir_prefix + std::string("batch_normalization_6_mean.bin"); - void* batch_normalization_6_mean = readTrainedWeights(batch_normalization_6_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_6_variance_path = dir_prefix + std::string("batch_normalization_6_variance.bin"); - void* batch_normalization_6_variance = readTrainedWeights(batch_normalization_6_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,1,1); - std::string batch_normalization_7_gamma_path = dir_prefix + std::string("batch_normalization_7_gamma.bin"); - void* batch_normalization_7_gamma = readTrainedWeights(batch_normalization_7_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_beta_path = dir_prefix + std::string("batch_normalization_7_beta.bin"); - void* batch_normalization_7_beta = readTrainedWeights(batch_normalization_7_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_mean_path = dir_prefix + std::string("batch_normalization_7_mean.bin"); - void* batch_normalization_7_mean = readTrainedWeights(batch_normalization_7_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_7_variance_path = dir_prefix + std::string("batch_normalization_7_variance.bin"); - void* batch_normalization_7_variance = readTrainedWeights(batch_normalization_7_variance_path.c_str(), 0,1,128,1,1); - std::string depthwise_conv2d_4_w_path = dir_prefix + std::string("depthwise_conv2d_4_w.bin"); - void* depthwise_conv2d_4_w = readTrainedWeights(depthwise_conv2d_4_w_path.c_str(), 0,128,1,3,3); - std::string batch_normalization_8_gamma_path = dir_prefix + std::string("batch_normalization_8_gamma.bin"); - void* batch_normalization_8_gamma = readTrainedWeights(batch_normalization_8_gamma_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_beta_path = dir_prefix + std::string("batch_normalization_8_beta.bin"); - void* batch_normalization_8_beta = readTrainedWeights(batch_normalization_8_beta_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_mean_path = dir_prefix + std::string("batch_normalization_8_mean.bin"); - void* batch_normalization_8_mean = readTrainedWeights(batch_normalization_8_mean_path.c_str(), 0,1,128,1,1); - std::string batch_normalization_8_variance_path = dir_prefix + std::string("batch_normalization_8_variance.bin"); - void* batch_normalization_8_variance = readTrainedWeights(batch_normalization_8_variance_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,1,1); - std::string batch_normalization_9_gamma_path = dir_prefix + std::string("batch_normalization_9_gamma.bin"); - void* batch_normalization_9_gamma = readTrainedWeights(batch_normalization_9_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_beta_path = dir_prefix + std::string("batch_normalization_9_beta.bin"); - void* batch_normalization_9_beta = readTrainedWeights(batch_normalization_9_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_mean_path = dir_prefix + std::string("batch_normalization_9_mean.bin"); - void* batch_normalization_9_mean = readTrainedWeights(batch_normalization_9_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_9_variance_path = dir_prefix + std::string("batch_normalization_9_variance.bin"); - void* batch_normalization_9_variance = readTrainedWeights(batch_normalization_9_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_5_w_path = dir_prefix + std::string("depthwise_conv2d_5_w.bin"); - void* depthwise_conv2d_5_w = readTrainedWeights(depthwise_conv2d_5_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_10_gamma_path = dir_prefix + std::string("batch_normalization_10_gamma.bin"); - void* batch_normalization_10_gamma = readTrainedWeights(batch_normalization_10_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_beta_path = dir_prefix + std::string("batch_normalization_10_beta.bin"); - void* batch_normalization_10_beta = readTrainedWeights(batch_normalization_10_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_mean_path = dir_prefix + std::string("batch_normalization_10_mean.bin"); - void* batch_normalization_10_mean = readTrainedWeights(batch_normalization_10_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_10_variance_path = dir_prefix + std::string("batch_normalization_10_variance.bin"); - void* batch_normalization_10_variance = readTrainedWeights(batch_normalization_10_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,1,1); - std::string batch_normalization_11_gamma_path = dir_prefix + std::string("batch_normalization_11_gamma.bin"); - void* batch_normalization_11_gamma = readTrainedWeights(batch_normalization_11_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_beta_path = dir_prefix + std::string("batch_normalization_11_beta.bin"); - void* batch_normalization_11_beta = readTrainedWeights(batch_normalization_11_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_mean_path = dir_prefix + std::string("batch_normalization_11_mean.bin"); - void* batch_normalization_11_mean = readTrainedWeights(batch_normalization_11_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_11_variance_path = dir_prefix + std::string("batch_normalization_11_variance.bin"); - void* batch_normalization_11_variance = readTrainedWeights(batch_normalization_11_variance_path.c_str(), 0,1,256,1,1); - std::string depthwise_conv2d_6_w_path = dir_prefix + std::string("depthwise_conv2d_6_w.bin"); - void* depthwise_conv2d_6_w = readTrainedWeights(depthwise_conv2d_6_w_path.c_str(), 0,256,1,3,3); - std::string batch_normalization_12_gamma_path = dir_prefix + std::string("batch_normalization_12_gamma.bin"); - void* batch_normalization_12_gamma = readTrainedWeights(batch_normalization_12_gamma_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_beta_path = dir_prefix + std::string("batch_normalization_12_beta.bin"); - void* batch_normalization_12_beta = readTrainedWeights(batch_normalization_12_beta_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_mean_path = dir_prefix + std::string("batch_normalization_12_mean.bin"); - void* batch_normalization_12_mean = readTrainedWeights(batch_normalization_12_mean_path.c_str(), 0,1,256,1,1); - std::string batch_normalization_12_variance_path = dir_prefix + std::string("batch_normalization_12_variance.bin"); - void* batch_normalization_12_variance = readTrainedWeights(batch_normalization_12_variance_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,512,256,1,1); - std::string batch_normalization_13_gamma_path = dir_prefix + std::string("batch_normalization_13_gamma.bin"); - void* batch_normalization_13_gamma = readTrainedWeights(batch_normalization_13_gamma_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_beta_path = dir_prefix + std::string("batch_normalization_13_beta.bin"); - void* batch_normalization_13_beta = readTrainedWeights(batch_normalization_13_beta_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_mean_path = dir_prefix + std::string("batch_normalization_13_mean.bin"); - void* batch_normalization_13_mean = readTrainedWeights(batch_normalization_13_mean_path.c_str(), 0,1,512,1,1); - std::string batch_normalization_13_variance_path = dir_prefix + std::string("batch_normalization_13_variance.bin"); - void* batch_normalization_13_variance = readTrainedWeights(batch_normalization_13_variance_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,2048,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 2500; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorRelu(var_8); - void* var_11 = tensorConvolution(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_18 = tensorConvolution(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_26 = tensorConvolution(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorRelu(var_27); - void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvolution(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_41 = tensorConvolution(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_46 = tensorRelu(var_45); - void* var_47 = tensorPooling(var_46,1,2,2,0,0,2,2); - void* var_49 = tensorGemmGPU(var_47, dense_1_w); - void* var_50 = tensorAdd(var_49, dense_1_b); - void* var_51 = tensorSoftmax(var_50); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_51); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_approx.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_approx.cc deleted file mode 100644 index 11cc3a38dd5da9dfcee7dd1181ab7e9a099fef88..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_approx.cc +++ /dev/null @@ -1,221 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); - std::string input_path = dir_prefix + std::string("input.bin"); - //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); - std::string labels_path = dir_prefix + std::string("labels.bin"); - //uint8_t* labels = readLabels(labels_path.c_str(), batch_size); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 2000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - void* var_2 = tensorConvPerf(input, conv2d_1_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_3 = tensorAdd(var_2, conv2d_1_b); - void* var_4 = tensorRelu(var_3); - void* var_6 = tensorConvPerf(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_7 = tensorAdd(var_6, conv2d_2_b); - void* var_8 = tensorRelu(var_7); - void* var_10 = tensorConvPerf(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_11 = tensorAdd(var_10, conv2d_3_b); - void* var_12 = tensorAdd(var_4, var_11); - void* var_13 = tensorRelu(var_12); - void* var_15 = tensorConvPerf(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_16 = tensorAdd(var_15, conv2d_4_b); - void* var_17 = tensorRelu(var_16); - void* var_19 = tensorConvPerf(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_20 = tensorAdd(var_19, conv2d_5_b); - void* var_21 = tensorAdd(var_13, var_20); - void* var_22 = tensorRelu(var_21); - void* var_24 = tensorConvPerf(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_25 = tensorAdd(var_24, conv2d_6_b); - void* var_26 = tensorRelu(var_25); - void* var_28 = tensorConvPerf(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_29 = tensorAdd(var_28, conv2d_7_b); - void* var_30 = tensorAdd(var_22, var_29); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvPerf(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0, 0, 0); - void* var_34 = tensorAdd(var_33, conv2d_8_b); - void* var_35 = tensorRelu(var_34); - void* var_37 = tensorConvPerf(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_38 = tensorAdd(var_37, conv2d_9_b); - void* var_40 = tensorConvPerf(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0, 0, 0); - void* var_41 = tensorAdd(var_40, conv2d_10_b); - void* var_42 = tensorAdd(var_41, var_38); - void* var_43 = tensorRelu(var_42); - void* var_45 = tensorConvPerf(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_46 = tensorAdd(var_45, conv2d_11_b); - void* var_47 = tensorRelu(var_46); - void* var_49 = tensorConvPerf(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_50 = tensorAdd(var_49, conv2d_12_b); - void* var_51 = tensorAdd(var_43, var_50); - void* var_52 = tensorRelu(var_51); - void* var_54 = tensorConvPerf(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0, 1, 0); - void* var_55 = tensorAdd(var_54, conv2d_13_b); - void* var_56 = tensorRelu(var_55); - void* var_58 = tensorConvPerf(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0, 0, 1); - void* var_59 = tensorAdd(var_58, conv2d_14_b); - void* var_60 = tensorAdd(var_52, var_59); - void* var_61 = tensorRelu(var_60); - void* var_63 = tensorConvPerf(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0, 0, 0); - void* var_64 = tensorAdd(var_63, conv2d_15_b); - void* var_65 = tensorRelu(var_64); - void* var_67 = tensorConvPerf(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_68 = tensorAdd(var_67, conv2d_16_b); - void* var_70 = tensorConvPerf(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0, 0, 0); - void* var_71 = tensorAdd(var_70, conv2d_17_b); - void* var_72 = tensorAdd(var_71, var_68); - void* var_73 = tensorRelu(var_72); - void* var_75 = tensorConvPerf(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_76 = tensorAdd(var_75, conv2d_18_b); - void* var_77 = tensorRelu(var_76); - void* var_79 = tensorConvPerf(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_80 = tensorAdd(var_79, conv2d_19_b); - void* var_81 = tensorAdd(var_73, var_80); - void* var_82 = tensorRelu(var_81); - void* var_84 = tensorConvPerf(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_85 = tensorAdd(var_84, conv2d_20_b); - void* var_86 = tensorRelu(var_85); - void* var_88 = tensorConvPerf(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0, 0, 0); - void* var_89 = tensorAdd(var_88, conv2d_21_b); - void* var_90 = tensorAdd(var_82, var_89); - void* var_91 = tensorRelu(var_90); - void* var_92 = tensorPooling(var_91,1,8,8,0,0,8,8); - void* var_94 = tensorGemmGPU(var_92, dense_1_w); - void* var_95 = tensorAdd(var_94, dense_1_b); - void* var_96 = tensorSoftmax(var_95); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_96); - final_accuracy += accuracy; - - freeBatchMemory(); - } - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_cudaperf.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_cudaperf.cc deleted file mode 100644 index 2e33715e8c6972966e7359a1e7b8fc5069e1f16f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_cudaperf.cc +++ /dev/null @@ -1,221 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); - std::string input_path = dir_prefix + std::string("input.bin"); - //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); - std::string labels_path = dir_prefix + std::string("labels.bin"); - //uint8_t* labels = readLabels(labels_path.c_str(), batch_size); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 2000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - void* var_2 = tensorConvPerfCuda(input, conv2d_1_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_3 = tensorAdd(var_2, conv2d_1_b); - void* var_4 = tensorRelu(var_3); - void* var_6 = tensorConvPerfCuda(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_7 = tensorAdd(var_6, conv2d_2_b); - void* var_8 = tensorRelu(var_7); - void* var_10 = tensorConvPerfCuda(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_11 = tensorAdd(var_10, conv2d_3_b); - void* var_12 = tensorAdd(var_4, var_11); - void* var_13 = tensorRelu(var_12); - void* var_15 = tensorConvPerfCuda(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_16 = tensorAdd(var_15, conv2d_4_b); - void* var_17 = tensorRelu(var_16); - void* var_19 = tensorConvPerfCuda(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_20 = tensorAdd(var_19, conv2d_5_b); - void* var_21 = tensorAdd(var_13, var_20); - void* var_22 = tensorRelu(var_21); - void* var_24 = tensorConvPerfCuda(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0, 3, 1, 2); - void* var_25 = tensorAdd(var_24, conv2d_6_b); - void* var_26 = tensorRelu(var_25); - void* var_28 = tensorConvPerfCuda(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_29 = tensorAdd(var_28, conv2d_7_b); - void* var_30 = tensorAdd(var_22, var_29); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvPerfCuda(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0, 1, 1, 0); - void* var_34 = tensorAdd(var_33, conv2d_8_b); - void* var_35 = tensorRelu(var_34); - void* var_37 = tensorConvPerfCuda(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_38 = tensorAdd(var_37, conv2d_9_b); - void* var_40 = tensorConvPerfCuda(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0, 1, 1, 0); - void* var_41 = tensorAdd(var_40, conv2d_10_b); - void* var_42 = tensorAdd(var_41, var_38); - void* var_43 = tensorRelu(var_42); - void* var_45 = tensorConvPerfCuda(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0, 3, 1, 0); - void* var_46 = tensorAdd(var_45, conv2d_11_b); - void* var_47 = tensorRelu(var_46); - void* var_49 = tensorConvPerfCuda(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_50 = tensorAdd(var_49, conv2d_12_b); - void* var_51 = tensorAdd(var_43, var_50); - void* var_52 = tensorRelu(var_51); - void* var_54 = tensorConvPerfCuda(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_55 = tensorAdd(var_54, conv2d_13_b); - void* var_56 = tensorRelu(var_55); - void* var_58 = tensorConvPerfCuda(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0, 1, 3, 1); - void* var_59 = tensorAdd(var_58, conv2d_14_b); - void* var_60 = tensorAdd(var_52, var_59); - void* var_61 = tensorRelu(var_60); - void* var_63 = tensorConvPerfCuda(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0, 1, 1, 0); - void* var_64 = tensorAdd(var_63, conv2d_15_b); - void* var_65 = tensorRelu(var_64); - void* var_67 = tensorConvPerfCuda(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_68 = tensorAdd(var_67, conv2d_16_b); - void* var_70 = tensorConvPerfCuda(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0, 3, 1, 2); - void* var_71 = tensorAdd(var_70, conv2d_17_b); - void* var_72 = tensorAdd(var_71, var_68); - void* var_73 = tensorRelu(var_72); - void* var_75 = tensorConvPerfCuda(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_76 = tensorAdd(var_75, conv2d_18_b); - void* var_77 = tensorRelu(var_76); - void* var_79 = tensorConvPerfCuda(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0, 1, 3, 0); - void* var_80 = tensorAdd(var_79, conv2d_19_b); - void* var_81 = tensorAdd(var_73, var_80); - void* var_82 = tensorRelu(var_81); - void* var_84 = tensorConvPerfCuda(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_85 = tensorAdd(var_84, conv2d_20_b); - void* var_86 = tensorRelu(var_85); - void* var_88 = tensorConvPerfCuda(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0, 1, 1, 0); - void* var_89 = tensorAdd(var_88, conv2d_21_b); - void* var_90 = tensorAdd(var_82, var_89); - void* var_91 = tensorRelu(var_90); - void* var_92 = tensorPooling(var_91,1,8,8,0,0,8,8); - void* var_94 = tensorGemmGPU(var_92, dense_1_w); - void* var_95 = tensorAdd(var_94, dense_1_b); - void* var_96 = tensorSoftmax(var_95); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_96); - final_accuracy += accuracy; - - freeBatchMemory(); - } - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_half.cc deleted file mode 100644 index f7c6593d525351085ee99606bc90fc1419980d8e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_half.cc +++ /dev/null @@ -1,194 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(1); - - int batch_size = 4000; - - std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); - std::string input_path = dir_prefix + std::string("input.bin"); - void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); - std::string labels_path = dir_prefix + std::string("labels.bin"); - uint8_t* labels = readLabels(labels_path.c_str(), batch_size); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - void* var_2 = tensorHalfConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_3 = tensorHalfAdd(var_2, conv2d_1_b); - void* var_4 = tensorHalfRelu(var_3); - void* var_6 = tensorHalfConvolution(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_7 = tensorHalfAdd(var_6, conv2d_2_b); - void* var_8 = tensorHalfRelu(var_7); - void* var_10 = tensorHalfConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_11 = tensorHalfAdd(var_10, conv2d_3_b); - void* var_12 = tensorHalfAdd(var_4, var_11); - void* var_13 = tensorHalfRelu(var_12); - void* var_15 = tensorHalfConvolution(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_16 = tensorHalfAdd(var_15, conv2d_4_b); - void* var_17 = tensorHalfRelu(var_16); - void* var_19 = tensorHalfConvolution(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_20 = tensorHalfAdd(var_19, conv2d_5_b); - void* var_21 = tensorHalfAdd(var_13, var_20); - void* var_22 = tensorHalfRelu(var_21); - void* var_24 = tensorHalfConvolution(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorHalfAdd(var_24, conv2d_6_b); - void* var_26 = tensorHalfRelu(var_25); - void* var_28 = tensorHalfConvolution(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorHalfAdd(var_28, conv2d_7_b); - void* var_30 = tensorHalfAdd(var_22, var_29); - void* var_31 = tensorHalfRelu(var_30); - void* var_33 = tensorHalfConvolution(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0); - void* var_34 = tensorHalfAdd(var_33, conv2d_8_b); - void* var_35 = tensorHalfRelu(var_34); - void* var_37 = tensorHalfConvolution(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_38 = tensorHalfAdd(var_37, conv2d_9_b); - void* var_40 = tensorHalfConvolution(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0); - void* var_41 = tensorHalfAdd(var_40, conv2d_10_b); - void* var_42 = tensorHalfAdd(var_41, var_38); - void* var_43 = tensorHalfRelu(var_42); - void* var_45 = tensorHalfConvolution(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_46 = tensorHalfAdd(var_45, conv2d_11_b); - void* var_47 = tensorHalfRelu(var_46); - void* var_49 = tensorHalfConvolution(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_50 = tensorHalfAdd(var_49, conv2d_12_b); - void* var_51 = tensorHalfAdd(var_43, var_50); - void* var_52 = tensorHalfRelu(var_51); - void* var_54 = tensorHalfConvolution(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_55 = tensorHalfAdd(var_54, conv2d_13_b); - void* var_56 = tensorHalfRelu(var_55); - void* var_58 = tensorHalfConvolution(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0); - void* var_59 = tensorHalfAdd(var_58, conv2d_14_b); - void* var_60 = tensorHalfAdd(var_52, var_59); - void* var_61 = tensorHalfRelu(var_60); - void* var_63 = tensorHalfConvolution(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0); - void* var_64 = tensorHalfAdd(var_63, conv2d_15_b); - void* var_65 = tensorHalfRelu(var_64); - void* var_67 = tensorHalfConvolution(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0); - void* var_68 = tensorHalfAdd(var_67, conv2d_16_b); - void* var_70 = tensorHalfConvolution(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0); - void* var_71 = tensorHalfAdd(var_70, conv2d_17_b); - void* var_72 = tensorHalfAdd(var_71, var_68); - void* var_73 = tensorHalfRelu(var_72); - void* var_75 = tensorHalfConvolution(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0); - void* var_76 = tensorHalfAdd(var_75, conv2d_18_b); - void* var_77 = tensorHalfRelu(var_76); - void* var_79 = tensorHalfConvolution(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0); - void* var_80 = tensorHalfAdd(var_79, conv2d_19_b); - void* var_81 = tensorHalfAdd(var_73, var_80); - void* var_82 = tensorHalfRelu(var_81); - void* var_84 = tensorHalfConvolution(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0); - void* var_85 = tensorHalfAdd(var_84, conv2d_20_b); - void* var_86 = tensorHalfRelu(var_85); - void* var_88 = tensorHalfConvolution(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0); - void* var_89 = tensorHalfAdd(var_88, conv2d_21_b); - void* var_90 = tensorHalfAdd(var_82, var_89); - void* var_91 = tensorHalfRelu(var_90); - void* var_92 = tensorHalfPooling(var_91,1,8,8,0,0,8,8); - void* var_94 = tensorHalfGemmGPU(var_92, dense_1_w); - void* var_95 = tensorHalfAdd(var_94, dense_1_b); - void* var_96 = tensorSoftmax(var_95); - - computeAccuracy2(labels, batch_size,var_96); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_inputapprox.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_inputapprox.cc deleted file mode 100644 index 6634ce92c9aed0fbcc32e68580fb3171145ee297..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/resnet18_cifar10_inputapprox.cc +++ /dev/null @@ -1,221 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/resnet18_cifar10_3/"); - std::string input_path = dir_prefix + std::string("input.bin"); - //void* input = readTrainedWeights(input_path.c_str(), 0, batch_size,3,32,32); - std::string labels_path = dir_prefix + std::string("labels.bin"); - //uint8_t* labels = readLabels(labels_path.c_str(), batch_size); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 2000; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - void* var_2 = tensorConvolutionKernelSamp(input, conv2d_1_w, 1, 1, 1, 1, 1, 0, 30); - void* var_3 = tensorAdd(var_2, conv2d_1_b); - void* var_4 = tensorRelu(var_3); - void* var_6 = tensorConvolutionKernelSamp(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0, 30); - void* var_7 = tensorAdd(var_6, conv2d_2_b); - void* var_8 = tensorRelu(var_7); - void* var_10 = tensorConvolutionKernelSamp(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0, 30); - void* var_11 = tensorAdd(var_10, conv2d_3_b); - void* var_12 = tensorAdd(var_4, var_11); - void* var_13 = tensorRelu(var_12); - void* var_15 = tensorConvolutionKernelSamp(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0, 30); - void* var_16 = tensorAdd(var_15, conv2d_4_b); - void* var_17 = tensorRelu(var_16); - void* var_19 = tensorConvolutionKernelSamp(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0, 30); - void* var_20 = tensorAdd(var_19, conv2d_5_b); - void* var_21 = tensorAdd(var_13, var_20); - void* var_22 = tensorRelu(var_21); - void* var_24 = tensorConvolutionKernelSamp(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0, 4); - void* var_25 = tensorAdd(var_24, conv2d_6_b); - void* var_26 = tensorRelu(var_25); - void* var_28 = tensorConvolutionKernelSamp(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0, 4); - void* var_29 = tensorAdd(var_28, conv2d_7_b); - void* var_30 = tensorAdd(var_22, var_29); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvolutionKernelSamp(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0, 30); - void* var_34 = tensorAdd(var_33, conv2d_8_b); - void* var_35 = tensorRelu(var_34); - void* var_37 = tensorConvolutionKernelSamp(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0, 30); - void* var_38 = tensorAdd(var_37, conv2d_9_b); - void* var_40 = tensorConvolutionKernelSamp(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0, 30); - void* var_41 = tensorAdd(var_40, conv2d_10_b); - void* var_42 = tensorAdd(var_41, var_38); - void* var_43 = tensorRelu(var_42); - void* var_45 = tensorConvolutionKernelSamp(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0, 4); - void* var_46 = tensorAdd(var_45, conv2d_11_b); - void* var_47 = tensorRelu(var_46); - void* var_49 = tensorConvolutionKernelSamp(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0, 30); - void* var_50 = tensorAdd(var_49, conv2d_12_b); - void* var_51 = tensorAdd(var_43, var_50); - void* var_52 = tensorRelu(var_51); - void* var_54 = tensorConvolutionKernelSamp(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0, 30); - void* var_55 = tensorAdd(var_54, conv2d_13_b); - void* var_56 = tensorRelu(var_55); - void* var_58 = tensorConvolutionKernelSamp(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0, 30); - void* var_59 = tensorAdd(var_58, conv2d_14_b); - void* var_60 = tensorAdd(var_52, var_59); - void* var_61 = tensorRelu(var_60); - void* var_63 = tensorConvolutionKernelSamp(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0, 30); - void* var_64 = tensorAdd(var_63, conv2d_15_b); - void* var_65 = tensorRelu(var_64); - void* var_67 = tensorConvolutionKernelSamp(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0, 30); - void* var_68 = tensorAdd(var_67, conv2d_16_b); - void* var_70 = tensorConvolutionKernelSamp(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0, 30); - void* var_71 = tensorAdd(var_70, conv2d_17_b); - void* var_72 = tensorAdd(var_71, var_68); - void* var_73 = tensorRelu(var_72); - void* var_75 = tensorConvolutionKernelSamp(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0, 30); - void* var_76 = tensorAdd(var_75, conv2d_18_b); - void* var_77 = tensorRelu(var_76); - void* var_79 = tensorConvolutionKernelSamp(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0, 30); - void* var_80 = tensorAdd(var_79, conv2d_19_b); - void* var_81 = tensorAdd(var_73, var_80); - void* var_82 = tensorRelu(var_81); - void* var_84 = tensorConvolutionKernelSamp(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0, 30); - void* var_85 = tensorAdd(var_84, conv2d_20_b); - void* var_86 = tensorRelu(var_85); - void* var_88 = tensorConvolutionKernelSamp(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0, 30); - void* var_89 = tensorAdd(var_88, conv2d_21_b); - void* var_90 = tensorAdd(var_82, var_89); - void* var_91 = tensorRelu(var_90); - void* var_92 = tensorPooling(var_91,1,8,8,0,0,8,8); - void* var_94 = tensorGemmGPU(var_92, dense_1_w); - void* var_95 = tensorAdd(var_94, dense_1_b); - void* var_96 = tensorSoftmax(var_95); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_96); - final_accuracy += accuracy; - - freeBatchMemory(); - } - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_alexnet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_alexnet.cc deleted file mode 100644 index a8129a1e459a15e26f595972724451e01d81b0a1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_alexnet.cc +++ /dev/null @@ -1,480 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -// FIXIT: Data allocations may need to organized - Alexnet may consume more than available mem - -/*void testAlexnet2(){ - - struct Tensor* input = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 100, 3, 224, 224); - initTensorValues(input); - - struct Tensor* conv1filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 96, 3, 11, 11); - printTensorDims(conv1filter); - - /****** Start of Layer 1 *************** - - // NOTE: Padding for same conv is computed as P = (F - 1 /2) - struct Tensor* conv1out = tensorConvolution(input, conv1filter, 5, 5, 4, 4); - printTensorDims(conv1out); - - struct Tensor* conv1bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 96, 1, 1); - struct Tensor* conv1bias_out = tensorAdd(conv1out, conv1bias); - - struct Tensor* relu1out = tensorRelu(conv1bias_out); - - // NOTE: These parameters are a deviation from the original paper - // The parameters match the alexnet TF model - // TODO: Try removing LRN and measure - seems like impact of LRN may be minimal - unsigned int LRN_window = 5; - double LRN_alpha = 2e-05; - double LRN_beta = 0.75; - double LRN_k = 1.0; - struct Tensor* lrn1out = tensorLRN(relu1out, LRN_window, LRN_alpha, LRN_beta, LRN_k); - printTensorDims(lrn1out); - - struct Tensor* maxpool1out = tensorPooling(lrn1out, 0, 3, 3, 0, 0, 2, 2); - - /****** End of Conv Layer 1 ********** - - struct Tensor** splits = tensorSplit(maxpool1out, 2, 1); - - struct Tensor* conv2W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 256, 48, 5, 5); - struct Tensor** conv2fils = tensorSplit(conv2W, 2, 0); - - struct Tensor* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 2, 2, 1, 1); - printTensorDims(conv2a_out); - - struct Tensor* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 2, 2, 1, 1); - printTensorDims(conv2b_out); - - struct Tensor* conv2_outs[2]; - conv2_outs[0] = conv2a_out; - conv2_outs[1] = conv2b_out; - - struct Tensor* conv2_concat_out = tensorConcat(conv2_outs, 2, 1); - printTensorDims(conv2_concat_out); - - struct Tensor* conv2bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 256, 1, 1); - struct Tensor* conv2bias_out = tensorAdd(conv2_concat_out, conv2bias); - struct Tensor* relu2out = tensorRelu(conv2bias_out); - struct Tensor* lrn2out = tensorLRN(relu2out, LRN_window, LRN_alpha, LRN_beta, LRN_k); - printTensorDims(lrn2out); - - struct Tensor* maxpool2out = tensorPooling(lrn2out, 0, 3, 3, 0, 0, 2, 2); - printTensorDims(maxpool2out); - - /******** End of Conv Layer 2 ************ - - struct Tensor* conv3filter = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 384, 256, 3, 3); - struct Tensor* conv3_out = tensorConvolution(maxpool2out, conv3filter, 1, 1, 1, 1); - - struct Tensor* conv3bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 384, 1, 1); - struct Tensor* conv3bias_out = tensorAdd(conv3_out, conv3bias); - struct Tensor* relu3out = tensorRelu(conv3bias_out); - printTensorDims(relu3out); - - /********* End of Conv layer 3 ****** - - struct Tensor** splits2 = tensorSplit(relu3out, 2, 1); - - struct Tensor* conv4W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 384, 192, 3, 3); - struct Tensor** conv4fils = tensorSplit(conv4W, 2, 0); - - printTensorDims(splits2[0]); - printTensorDims(conv4fils[0]); - - struct Tensor* conv4a_out = tensorConvolution(splits2[0], conv4fils[0], 1, 1, 1, 1); - printTensorDims(conv4a_out); - - struct Tensor* conv4b_out = tensorConvolution(splits2[1], conv4fils[1], 1, 1, 1, 1); - printTensorDims(conv4b_out); - - struct Tensor* conv4_outs[2]; - conv4_outs[0] = conv4a_out; - conv4_outs[1] = conv4b_out; - - struct Tensor* conv4_concat_out = tensorConcat(conv4_outs, 2, 1); - printTensorDims(conv4_concat_out); - - struct Tensor* conv4bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 384, 1, 1); - struct Tensor* conv4bias_out = tensorAdd(conv4_concat_out, conv4bias); - struct Tensor* relu4out = tensorRelu(conv4bias_out); - printTensorDims(relu4out); - - /********* End of Conv layer 4 ****** - - struct Tensor** splits3 = tensorSplit(relu4out, 2, 1); - - struct Tensor* conv5W = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 256, 192, 3, 3); - struct Tensor** conv5fils = tensorSplit(conv5W, 2, 0); - - printTensorDims(splits3[0]); - printTensorDims(conv5fils[0]); - - struct Tensor* conv5a_out = tensorConvolution(splits3[0], conv5fils[0], 1, 1, 1, 1); - printTensorDims(conv5a_out); - - struct Tensor* conv5b_out = tensorConvolution(splits3[1], conv5fils[1], 1, 1, 1, 1); - printTensorDims(conv5b_out); - - struct Tensor* conv5_outs[2]; - conv5_outs[0] = conv5a_out; - conv5_outs[1] = conv5b_out; - - struct Tensor* conv5_concat_out = tensorConcat(conv5_outs, 2, 1); - printTensorDims(conv5_concat_out); - - struct Tensor* conv5bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, 1, 256, 1, 1); - struct Tensor* conv5bias_out = tensorAdd(conv5_concat_out, conv5bias); - struct Tensor* relu5out = tensorRelu(conv5bias_out); - printTensorDims(relu5out); - - struct Tensor* maxpool5out = tensorPooling(relu5out, 0, 3, 3, 0, 0, 2, 2); - printTensorDims(maxpool5out); - - /********* End of Conv layer 5 ****** - - struct Tensor* fc1_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, - 1, 1, 256*6*6, 4096); - struct Tensor* gemm1out = tensorGemm(maxpool5out, fc1_weights); - printTensorDims(gemm1out); - - struct Tensor* bias = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, - 1, 1, 1, 4096); - struct Tensor* gemm1biasout = tensorGemmBias(gemm1out, bias); - printTensorDims(gemm1biasout); - - struct Tensor* relu6out = tensorRelu(gemm1biasout); - printTensorDims(relu6out); - - /***** End of FC1 layer ******** - - struct Tensor* fc2_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, - 1, 1, 4096, 4096); - struct Tensor* gemm2out = tensorGemm(relu6out, fc2_weights); - printTensorDims(gemm2out); - - struct Tensor* bias2 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, - 1, 1, 1, 4096); - struct Tensor* gemm2biasout = tensorGemmBias(gemm2out, bias2); - printTensorDims(gemm2biasout); - - struct Tensor* relu7out = tensorRelu(gemm2biasout); - printTensorDims(relu7out); - - /***** End of FC2 layer ******** - - struct Tensor* fc3_weights = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, - 1, 1, 4096, 1000); - struct Tensor* gemm3out = tensorGemm(relu7out, fc3_weights); - printTensorDims(gemm3out); - - struct Tensor* bias3 = create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, - 1, 1, 1, 1000); - struct Tensor* gemm3biasout = tensorGemmBias(gemm3out, bias3); - printTensorDims(gemm3biasout); - - /******** End of FC3 Layer ********** - struct Tensor* result = tensorSoftmax(gemm3biasout); - printTensorDims(result); - -} */ - - - -void printLikelihood(char* labels_file, int num_labels, void* result_ptr){ - - struct Tensor* result = (struct Tensor*) result_ptr; - - size_t batch_dim = result->dims.dim_sizes[0]; - size_t channels = result->dims.dim_sizes[1]; - float* data = (float*) result->host_data; - - for(int i = 0; i < batch_dim; i++){ - int chosen = 0; - for (int id = 1; id < channels; ++id){ - if (data[i * channels + chosen] < data[i * channels + id]) chosen = id; - } - - printf("** chosen = %d, label = %f, label+3 = %f \n", - chosen, data[chosen], data[chosen+3]); - } - - //float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0; - //printf("****** Accuracy = %f \n\n", accuracy); -} - - -//--- Results not matching -// *** CHECK: -// 1) cudnnCrossCorrelation vs cudnnConvolution -// 2) Weights -// 3) Tensor outputs -// 4) Data layouts - - - -/*** NOTE: REFERECNCE ARCHITECTURE **/ -// FIXIT: Data allocations may need to organized - Alexnet may consume more than available mem -void testAlexnet3(){ - - int test_batch_size = 2; - int conv_mode = 1; // CROSS_CORRELATION matches the TF conv2d implementation - int conv_precision = 0; // floating point precision for convolution - - printf("****** AlexNet Architecture 3 ******** \n\n"); - void* input = readTrainedWeights("../alexnet/params/combined_imgs.bin", - CUDNN_DATA_FLOAT, test_batch_size, 3, 227, 227); - dumpWeightsToFile("tensors_out/input.out", input); - - /****** Start of Layer 1 ****************/ - void* conv1filter = readTrainedWeights("../alexnet/params/conv1.bin", - CUDNN_DATA_FLOAT, 96, 3, 11, 11); - - printTensorDims(conv1filter); - dumpWeightsToFile("tensors_out/conv1filter.out", conv1filter); - - // NOTE: the trained model does NOT have any padding in this conv - void* conv1out = tensorConvolution(input, conv1filter, 4, 4, 4, 4, - conv_mode, conv_precision); - printTensorDims(conv1out); - - void* conv1bias = readTrainedWeights("../alexnet/params/conv1.bias.bin", - CUDNN_DATA_FLOAT, 1, 96, 1, 1); - void* conv1bias_out = tensorAdd(conv1out, conv1bias); - - dumpWeightsToFile("tensors_out/conv1_init.out", conv1out); - - void* relu1out = tensorRelu(conv1bias_out); - printTensorDims(relu1out); - dumpWeightsToFile("tensors_out/conv1.out", relu1out); - - // NOTE: These parameters are a deviation from the original paper - // The parameters match the alexnet TF model - // TODO: Try removing LRN and measure - seems like impact of LRN may be minimal - unsigned int LRN_window = 5; - double LRN_alpha = 2e-05 * LRN_window; - double LRN_beta = 0.75; - double LRN_k = 1.0; - - // TEST-point - Compare TF vs CUDNN - void* lrn1out = tensorLRN(relu1out, LRN_window, LRN_alpha, LRN_beta, LRN_k); - printTensorDims(lrn1out); - dumpWeightsToFile("tensors_out/lrn1.out", lrn1out); - - void* maxpool1out = tensorPooling(lrn1out, 0, 3, 3, 0, 0, 2, 2); - printTensorDims(maxpool1out); - dumpWeightsToFile("tensors_out/maxpool1.out", maxpool1out); - - /****** End of Conv Layer 1 ***********/ - - // TEST-point - void** splits = tensorSplit(maxpool1out, 2, 1); - - void* concat_test1 = tensorConcat(splits, 2, 1); - compareTensors(maxpool1out, concat_test1); - - void* conv2W = readTrainedWeights("../alexnet/params/conv2.bin", - CUDNN_DATA_FLOAT, 256, 48, 5, 5); - - dumpWeightsToFile("tensors_out/conv2filter.out", conv2W); - - // TEST point - compare split convolution across TF vs cuDNN - void** conv2fils = tensorSplit(conv2W, 2, 0); - - void* concat_test2 = tensorConcat(conv2fils, 2, 0); - compareTensors(conv2W, concat_test2); - - // NOTE: Padding for same conv is computed as P = ((F - 1) / 2) - void* conv2a_out = tensorConvolution(splits[0], conv2fils[0], 2, 2, 1, 1, - conv_mode, conv_precision); - printTensorDims(conv2a_out); - - void* conv2b_out = tensorConvolution(splits[1], conv2fils[1], 2, 2, 1, 1, - conv_mode, conv_precision); - printTensorDims(conv2b_out); - - void* conv2_outs[2]; - conv2_outs[0] = conv2a_out; - conv2_outs[1] = conv2b_out; - - // Test point - void* conv2_concat_out = tensorConcat(conv2_outs, 2, 1); - printTensorDims(conv2_concat_out); - dumpWeightsToFile("tensors_out/conv2_init.out", conv2_concat_out); - - void* conv2bias = readTrainedWeights("../alexnet/params/conv2.bias.bin", - CUDNN_DATA_FLOAT, 1, 256, 1, 1); - void* conv2bias_out = tensorAdd(conv2_concat_out, conv2bias); - printTensorDims(conv2bias_out); - - dumpWeightsToFile("tensors_out/conv2_bias_init.out", conv2bias_out); - - void* relu2out = tensorRelu(conv2bias_out); - dumpWeightsToFile("tensors_out/conv2.out", relu2out); - printTensorDims(relu2out); - - void* lrn2out = tensorLRN(relu2out, LRN_window, LRN_alpha, LRN_beta, LRN_k); - printTensorDims(lrn2out); - - void* maxpool2out = tensorPooling(lrn2out, 0, 3, 3, 0, 0, 2, 2); - printTensorDims(maxpool2out); - - /******** End of Conv Layer 2 *************/ - - void* conv3filter = readTrainedWeights("../alexnet/params/conv3.bin", - CUDNN_DATA_FLOAT, 384, 256, 3, 3); - void* conv3_out = tensorConvolution(maxpool2out, conv3filter, 1, 1, 1, 1, - conv_mode, conv_precision); - - void* conv3bias = readTrainedWeights("../alexnet/params/conv3.bias.bin", - CUDNN_DATA_FLOAT, 1, 384, 1, 1); - void* conv3bias_out = tensorAdd(conv3_out, conv3bias); - void* relu3out = tensorRelu(conv3bias_out); - dumpWeightsToFile("tensors_out/conv3.out", relu3out); - printTensorDims(relu3out); - - /********* End of Conv layer 3 *******/ - - void** splits2 = tensorSplit(relu3out, 2, 1); - - void* conv4W = readTrainedWeights("../alexnet/params/conv4.bin", - CUDNN_DATA_FLOAT, 384, 192, 3, 3); - void** conv4fils = tensorSplit(conv4W, 2, 0); - - printTensorDims(splits2[0]); - printTensorDims(conv4fils[0]); - - // Test-point DOES the pairing of splits and filters make sense? - void* conv4a_out = tensorConvolution(splits2[0], conv4fils[0], 1, 1, 1, 1, - conv_mode, conv_precision); - printTensorDims(conv4a_out); - - void* conv4b_out = tensorConvolution(splits2[1], conv4fils[1], 1, 1, 1, 1, - conv_mode, conv_precision); - printTensorDims(conv4b_out); - - void* conv4_outs[2]; - conv4_outs[0] = conv4a_out; - conv4_outs[1] = conv4b_out; - - void* conv4_concat_out = tensorConcat(conv4_outs, 2, 1); - printTensorDims(conv4_concat_out); - - void* conv4bias = readTrainedWeights("../alexnet/params/conv4.bias.bin", - CUDNN_DATA_FLOAT, 1, 384, 1, 1); - void* conv4bias_out = tensorAdd(conv4_concat_out, conv4bias); - - void* relu4out = tensorRelu(conv4bias_out); - printTensorDims(relu4out); - - /********* End of Conv layer 4 *******/ - - void** splits3 = tensorSplit(relu4out, 2, 1); - - void* conv5W = readTrainedWeights("../alexnet/params/conv5.bin", - CUDNN_DATA_FLOAT, 256, 192, 3, 3); - void** conv5fils = tensorSplit(conv5W, 2, 0); - - printTensorDims(splits3[0]); - printTensorDims(conv5fils[0]); - - void* conv5a_out = tensorConvolution(splits3[0], conv5fils[0], 1, 1, 1, 1, - conv_mode, conv_precision); - printTensorDims(conv5a_out); - - void* conv5b_out = tensorConvolution(splits3[1], conv5fils[1], 1, 1, 1, 1, - conv_mode, conv_precision); - printTensorDims(conv5b_out); - - void* conv5_outs[2]; - conv5_outs[0] = conv5a_out; - conv5_outs[1] = conv5b_out; - - void* conv5_concat_out = tensorConcat(conv5_outs, 2, 1); - printTensorDims(conv5_concat_out); - - void* conv5bias = readTrainedWeights("../alexnet/params/conv5.bias.bin", - CUDNN_DATA_FLOAT, 1, 256, 1, 1); - void* conv5bias_out = tensorAdd(conv5_concat_out, conv5bias); - void* relu5out = tensorRelu(conv5bias_out); - printTensorDims(relu5out); - - void* maxpool5out = tensorPooling(relu5out, 0, 3, 3, 0, 0, 2, 2); - printTensorDims(maxpool5out); - - /********* End of Conv layer 5 *******/ - - // Test-point: I suspect the data may not be layed out correct (either in file or after loading) - void* fc1_weights = readTrainedWeights("../alexnet/params/fc1.bin", - CUDNN_DATA_FLOAT, 1, 1, 256*6*6, 4096); - void* gemm1out = tensorGemm(maxpool5out, fc1_weights); - printTensorDims(gemm1out); - - void* bias = readTrainedWeights("../alexnet/params/fc1.bias.bin", - CUDNN_DATA_FLOAT, 1, 1, 1, 4096); - - void* gemm1biasout = tensorGemmBias(gemm1out, bias); - printTensorDims(gemm1biasout); - - void* relu6out = tensorRelu(gemm1biasout); - printTensorDims(relu6out); - - /***** End of FC1 layer *********/ - - void* fc2_weights = readTrainedWeights("../alexnet/params/fc2.bin", - CUDNN_DATA_FLOAT, 1, 1, 4096, 4096); - void* gemm2out = tensorGemm(relu6out, fc2_weights); - printTensorDims(gemm2out); - - void* bias2 = readTrainedWeights("../alexnet/params/fc2.bias.bin", - CUDNN_DATA_FLOAT, 1, 1, 1, 4096); - void* gemm2biasout = tensorGemmBias(gemm2out, bias2); - printTensorDims(gemm2biasout); - - void* relu7out = tensorRelu(gemm2biasout); - printTensorDims(relu7out); - - /***** End of FC2 layer *********/ - - void* fc3_weights = readTrainedWeights("../alexnet/params/fc3.bin", - CUDNN_DATA_FLOAT, 1, 1, 4096, 1000); - void* gemm3out = tensorGemm(relu7out, fc3_weights); - printTensorDims(gemm3out); - - void* bias3 = readTrainedWeights("../alexnet/params/fc3.bias.bin", - CUDNN_DATA_FLOAT, 1, 1, 1, 1000); - void* gemm3biasout = tensorGemmBias(gemm3out, bias3); - printTensorDims(gemm3biasout); - - /******** End of FC3 Layer ***********/ - void* result = tensorSoftmax(gemm3biasout); - printTensorDims(result); - - // FIXIT: Pass file with the labels - printLikelihood("", test_batch_size, result); - // THINK: I believe that comparing the results do not need to be part of the HPVM graph - printf("END of Alexnet3 -- \n"); -} - - - - - -int main(){ - - // IMP-NOTE: Always initialize the runtime - initializeRuntime(0); - - //testAlexnet1(); - //testAlexnet2(); - testAlexnet3(); - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_fc_half.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_fc_half.cc deleted file mode 100644 index c0fee9b659db9ff45f56b75b989fbbed68523d43..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_fc_half.cc +++ /dev/null @@ -1,74 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" -#include "../include/types.h" - - - -/* NOTE: Reference Architecture to use for profiling */ -void testFC_half(){ - - printf("********* Fully Connected DNN-1 ********* \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - - void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin", - float_type, 1, 1, 784, 128); - void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin", - float_type, 1, 128, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin", - float_type, 1, 1, 128, 10); - void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - // Start execution profiling Tensor ops - startProfiling(); - - // Layer-1 - void* fc1out = tensorHgemm(input, fc1_weights); - printTensorDims(fc1out); - - void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); - printTensorDims(fc1_bias_out); - - void* fc1_relu = tensorRelu(fc1_bias_out); - printTensorDims(fc1_relu); - - // Layer-2 - void* fc2out = tensorHgemm(fc1_relu, fc2_weights); - printTensorDims(fc2out); - - void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); - printTensorDims(fc2_bias_out); - - void* fc2_relu = tensorRelu(fc2_bias_out); - printTensorDims(fc2_relu); - - void* result = tensorSoftmax(fc2_relu); - printTensorDims(result); - - stopProfiling(); - - computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", - test_batch_size, result); -} - - - -int main(){ - - // This initializes the runtime - must be called before anything - llvm_hpvm_initTensorRt(0); - - testFC_half(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2.cc deleted file mode 100644 index d5211be3918adcd030fc40c13cba1ff0d7c53c18..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2.cc +++ /dev/null @@ -1,112 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - - -/* NOTE: Reference Architecture to use for profiling */ -void testLenet2Arch(){ - - printf("********* Lenet-2 Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet2_params/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet2_params/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet2_params/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet2_params/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet2_params/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet2_params/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet2_params/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet2_params/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - // Start power and performnce profiling - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - - // NOTE: For tensorAdd, the only dimension that MUST match is channels - tensorAdd(conv1out, conv1_bias); // NOTE: In place operation - printTensorDims(conv1out); - - void* conv1_reluout = tensorRelu(conv1out); - //dumpWeightsToFile("tensors_out/conv1_relu.out", conv1_reluout); - - void* pool1out = tensorPooling(conv1_reluout, 0, 2, 2, 0, 0, 2, 2); - printTensorDims(pool1out); - //dumpWeightsToFile("tensors_out/pool1.out", pool1out); - // NOTE: input channels have to match between tensor op inputs and outputs - void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); // NOTE: In place operation - printTensorDims(conv2out); - - void* conv2_reluout = tensorRelu(conv2out); - //dumpWeightsToFile("tensors_out/conv2.out", conv2_reluout); - - void* pool2out = tensorPooling(conv2_reluout, 0, 2, 2, 0, 0, 2, 2); - printTensorDims(pool2out); - //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out); - - void* gemm1out = tensorGemmGPU(pool2out, fc1_weights); - printTensorDims(gemm1out); - //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out); - - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - printTensorDims(gemm1biasout); - - void* relu1out = tensorRelu(gemm1biasout); - printTensorDims(relu1out); - - void* gemm2out = tensorGemmGPU(relu1out, fc2_weights); - printTensorDims(gemm2out); - - void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); - printTensorDims(gemm2_biasout); - - void* result = tensorSoftmax(gemm2_biasout); - printTensorDims(result); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", - test_batch_size, result); - // THINK: I believe that comparing the results do not need to be part of the HPVM graph -} - - -int main(){ - - llvm_hpvm_initTensorRt(0); - - testLenet2Arch(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2_promise.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2_promise.cc deleted file mode 100644 index 358cb6a75b8e63ca0a0bd964c9f73f2d16c39b4f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet2_promise.cc +++ /dev/null @@ -1,113 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - - -/* NOTE: Reference Architecture to use for profiling */ -void testLenet2Arch(){ - - printf("********* Lenet-2 Architecture ********** \n"); - - int test_batch_size = 10000; - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet2_params/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet2_params/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet2_params/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet2_params/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet2_params/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet2_params/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet2_params/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet2_params/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - // Start power and performnce profiling - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - dumpWeightsToFile("tensors_out/conv1_out.out", conv1out); - - tensorAdd(conv1out, conv1_bias); // NOTE: In-place operation - printTensorDims(conv1out); - - dumpWeightsToFile("tensors_out/conv1_bias_add.out", conv1out); - - void* conv1_reluout = tensorRelu(conv1out); - dumpWeightsToFile("tensors_out/conv1_relu.out", conv1_reluout); - - void* pool1out = tensorPooling(conv1_reluout, 0, 2, 2, 0, 0, 2, 2); - printTensorDims(pool1out); - dumpWeightsToFile("tensors_out/conv1_pool.out", pool1out); - - // NOTE: input channels have to match between tensor op inputs and outputs - void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); // NOTE: In place operation - printTensorDims(conv2out); - - void* conv2_reluout = tensorRelu(conv2out); - //dumpWeightsToFile("tensors_out/conv2.out", conv2_reluout); - - void* pool2out = tensorPooling(conv2_reluout, 0, 2, 2, 0, 0, 2, 2); - printTensorDims(pool2out); - //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out); - - void* gemm1out = tensorGemmGPU(pool2out, fc1_weights); - printTensorDims(gemm1out); - //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out); - - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - printTensorDims(gemm1biasout); - - void* relu1out = tensorRelu(gemm1biasout); - printTensorDims(relu1out); - - void* gemm2out = tensorGemmGPU(relu1out, fc2_weights); - printTensorDims(gemm2out); - - void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); - printTensorDims(gemm2_biasout); - - void* result = tensorSoftmax(gemm2_biasout); - printTensorDims(result); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", - test_batch_size, result); -} - - -int main(){ - - llvm_hpvm_initTensorRt(0); - - testLenet2Arch(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet_acc.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet_acc.cc deleted file mode 100644 index 42e364289e499d92591692a04e42988fd1a66dc5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test/test_lenet_acc.cc +++ /dev/null @@ -1,109 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include "../../../tensor_runtime/include/tensor_runtime.h" -#include "../../include/utils.h" - - - -/* NOTE: Reference Architecture to use for profiling */ -void testLenet2Arch(){ - - printf("********* Lenet-2 Architecture ********** \n"); - - int test_batch_size = 1000; - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet_test_params/conv1.bin", - float_type, 32, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_test_params/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_test_params/conv2.bin", - float_type, 64, 32, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_test_params/conv2_bias.bin", - float_type, 1, 64, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_test_params/fc1.bin", - float_type, 1, 1, 7*7*64, 1024); - void* fc1_bias = readTrainedWeights("../model_params/lenet_test_params/fc1_bias.bin", - float_type, 1, 1024, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/lenet_test_params/fc2.bin", - float_type, 1, 1, 1024, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet_test_params/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - - // Start power and performnce profiling - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorConvolution(input, conv1_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - - // NOTE: For tensorAdd, the only dimension that MUST match is channels - tensorAdd(conv1out, conv1_bias); // NOTE: In place operation - printTensorDims(conv1out); - - void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); - printTensorDims(pool1out); - //dumpWeightsToFile("tensors_out/pool1.out", pool1out); - // NOTE: input channels have to match between tensor op inputs and outputs - void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); // NOTE: In place operation - printTensorDims(conv2out); - - //void* conv2_reluout = tensorRelu(conv2out); - - void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); - printTensorDims(pool2out); - //dumpWeightsToFile("tensors_out/maxpool2.out", pool2out); - - void* gemm1out = tensorGemmGPU(pool2out, fc1_weights); - printTensorDims(gemm1out); - //dumpWeightsToFile("tensors_out/gemm1.out", gemm1out); - - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - printTensorDims(gemm1biasout); - - void* relu1out = tensorRelu(gemm1biasout); - printTensorDims(relu1out); - - void* gemm2out = tensorGemmGPU(relu1out, fc2_weights); - printTensorDims(gemm2out); - - void* gemm2_biasout = tensorAdd(gemm2out, fc2_bias); - printTensorDims(gemm2_biasout); - - void* result = tensorSoftmax(gemm2_biasout); - printTensorDims(result); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", - test_batch_size, result); - // THINK: I believe that comparing the results do not need to be part of the HPVM graph -} - - -int main(){ - - llvm_hpvm_initTensorRt(0); - - testLenet2Arch(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network.cc deleted file mode 100644 index e8b70146a10359bf2df7420ae388325e6a658557..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network.cc +++ /dev/null @@ -1,152 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" -#include "../include/types.h" - - - -void testFCNetworkArchCPU(){ - - printf("********* Fully Connected DNN-1 ********* \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - - void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin", - float_type, 1, 1, 784, 128); - void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin", - float_type, 1, 128, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin", - float_type, 1, 1, 128, 10); - void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - //dumpWeightsToFile("tensors_out/input_fc.out", input); - //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights); - - printTensorDims(input); - printTensorDims(fc1_weights); - - // Start profiling tensor ops - startProfiling(); - - // Layer-1 - void* fc1out = tensorGemmCPU(input, fc1_weights); - printTensorDims(fc1out); - //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out); - - void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); - //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out); - printTensorDims(fc1_bias_out); - - void* fc1_relu = tensorRelu(fc1_bias_out); - //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu); - printTensorDims(fc1_relu); - - // Layer-2 - void* fc2out = tensorGemmCPU(fc1_relu, fc2_weights); - //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out); - printTensorDims(fc2out); - - void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); - //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out); - printTensorDims(fc2_bias_out); - - void* fc2_relu = tensorRelu(fc2_bias_out); - //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu); - printTensorDims(fc2_relu); - - void* result = tensorSoftmax(fc2_relu); - printTensorDims(result); - - // stopProfiling - stopProfiling(); - - computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); - // THINK: I believe that comparing the results do not need to be part of the HPVM graph -} - - - -/* NOTE: Reference Architecture to use for profiling */ -void testFCNetworkArchGPU(){ - - printf("********* Fully Connected DNN-1 ********* \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - void* input = readTrainedWeights("../model_params/FC_network2/mnist_float_input.bin", - float_type, test_batch_size, 1, 28, 28); - - void* fc1_weights = readTrainedWeights("../model_params/FC_network2/fc1.bin", - float_type, 1, 1, 784, 128); - void* fc1_bias = readTrainedWeights("../model_params/FC_network2/fc1_bias.bin", - float_type, 1, 128, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/FC_network2/fc2.bin", - float_type, 1, 1, 128, 10); - void* fc2_bias = readTrainedWeights("../model_params/FC_network2/fc2_bias.bin", - float_type, 1, 10, 1, 1); - - // Start execution profiling Tensor ops - startProfiling(); - - // Layer-1 - void* fc1out = tensorGemmGPU(input, fc1_weights); - printTensorDims(fc1out); - - void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); - //dumpWeightsToFile("tensors_out/fc1_biasout_fc.out", fc1_bias_out); - printTensorDims(fc1_bias_out); - - void* fc1_relu = tensorRelu(fc1_bias_out); - //dumpWeightsToFile("tensors_out/fc1_relu_fc.out", fc1_relu); - printTensorDims(fc1_relu); - - // IMPORTANT: Adding errors to the FC1 layer output - //tensorAddError(fc1_relu, 3); - - // Layer-2 - void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); - //dumpWeightsToFile("tensors_out/fc2out_fc.out", fc2out); - printTensorDims(fc2out); - - void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); - //dumpWeightsToFile("tensors_out/fc2_biasout_fc.out", fc2_bias_out); - printTensorDims(fc2_bias_out); - - void* fc2_relu = tensorRelu(fc2_bias_out); - //dumpWeightsToFile("tensors_out/fc2_relu_fc.out", fc2_relu); - printTensorDims(fc2_relu); - - void* result = tensorSoftmax(fc2_relu); - printTensorDims(result); - - stopProfiling(); - - computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); - // THINK: I believe that comparing the results do not need to be part of the HPVM graph -} - - - -int main(){ - - // This initializes the runtime - must be called before anything - llvm_hpvm_initTensorRt(0); - - //testFCNetworkArchCPU(); - - testFCNetworkArchGPU(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network2.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network2.cc deleted file mode 100644 index fc00532a1b3712fab9d098a9a8e1a1586f1458a5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network2.cc +++ /dev/null @@ -1,94 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" -#include "../include/types.h" - - -void test4LayerFC(){ - - printf("********* 4-layer FC Network ********* \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - float_type, - test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeights("../model_params/FC_network1/fc1.bin", - float_type, 1, 1, 784, 1000); - void* fc1_bias = readTrainedWeights("../model_params/FC_network1/fc1_bias.bin", - float_type, 1, 1000, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/FC_network1/fc2.bin", - float_type, 1, 1, 1000, 500); - void* fc2_bias = readTrainedWeights("../model_params/FC_network1/fc2_bias.bin", - float_type, 1, 500, 1, 1); - void* fc3_weights = readTrainedWeights("../model_params/FC_network1/fc3.bin", - float_type, 1, 1, 500, 200); - void* fc3_bias = readTrainedWeights("../model_params/FC_network1/fc3_bias.bin", - float_type, 1, 200, 1, 1); - void* fc4_weights = readTrainedWeights("../model_params/FC_network1/fc4.bin", - float_type, 1, 1, 200, 10); - void* fc4_bias = readTrainedWeights("../model_params/FC_network1/fc4_bias.bin", - float_type, 1, 10, 1, 1); - - //dumpWeightsToFile("tensors_out/input_fc.out", input); - //dumpWeightsToFile("tensors_out/fc1_w_fc.out", fc1_weights); - - // Start Profiling execution times of Tensor operations - startProfiling(); - - // Layer-1 - void* fc1out = tensorGemmGPU(input, fc1_weights); - printTensorDims(fc1out); - //dumpWeightsToFile("tensors_out/fc1out_fc.out", fc1out); - - void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); - printTensorDims(fc1_bias_out); - //dumpWeightsToFile("tensors_out/fc_fc1.out", fc1_bias_out); - - // Layer-2 - void* fc2out = tensorGemmGPU(fc1_bias_out, fc2_weights); - printTensorDims(fc2out); - - void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); - printTensorDims(fc2_bias_out); - - // Layer-3 - void* fc3out = tensorGemmGPU(fc2_bias_out, fc3_weights); - printTensorDims(fc3out); - - void* fc3_bias_out = tensorAdd(fc3out, fc3_bias); - printTensorDims(fc3_bias_out); - - // Layer-4 - void* fc4out = tensorGemmGPU(fc3_bias_out, fc4_weights); - printTensorDims(fc4out); - - void* fc4_bias_out = tensorAdd(fc4out, fc4_bias); - printTensorDims(fc4_bias_out); - - void* result = tensorSoftmax(fc4_bias_out); - printTensorDims(result); - - stopProfiling(); - - computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); -} - - - - -int main(){ - - llvm_hpvm_initTensorRt(0); - - test4LayerFC(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network3.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network3.cc deleted file mode 100644 index 531bb01695cddb70de0f9bea90f6b229679e9bce..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_fc_network3.cc +++ /dev/null @@ -1,93 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" -#include "../include/types.h" - - -void test4LayerFC(){ - - printf("********* 4-layer FC Network ********* \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - float_type, - test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeights("../model_params/FC_network3/fc1.bin", - float_type, 1, 1, 784, 512); - void* fc1_bias = readTrainedWeights("../model_params/FC_network3/fc1_bias.bin", - float_type, 1, 512, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/FC_network3/fc2.bin", - float_type, 1, 1, 512, 256); - void* fc2_bias = readTrainedWeights("../model_params/FC_network3/fc2_bias.bin", - float_type, 1, 256, 1, 1); - void* fc3_weights = readTrainedWeights("../model_params/FC_network3/fc3.bin", - float_type, 1, 1, 256, 128); - void* fc3_bias = readTrainedWeights("../model_params/FC_network3/fc3_bias.bin", - float_type, 1, 128, 1, 1); - void* fc4_weights = readTrainedWeights("../model_params/FC_network3/fc4.bin", - float_type, 1, 1, 128, 10); - void* fc4_bias = readTrainedWeights("../model_params/FC_network3/fc4_bias.bin", - float_type, 1, 10, 1, 1); - - // Start Profiling execution times of Tensor operations - startProfiling(); - - // Layer-1 - void* fc1out = tensorGemmGPU(input, fc1_weights); - printTensorDims(fc1out); - - void* fc1_bias_out = tensorAdd(fc1out, fc1_bias); - printTensorDims(fc1_bias_out); - void* fc1_relu = tensorRelu(fc1_bias_out); - - // Layer-2 - void* fc2out = tensorGemmGPU(fc1_relu, fc2_weights); - printTensorDims(fc2out); - - void* fc2_bias_out = tensorAdd(fc2out, fc2_bias); - printTensorDims(fc2_bias_out); - void* fc2_relu = tensorRelu(fc2_bias_out); - - // Layer-3 - void* fc3out = tensorGemmGPU(fc2_relu, fc3_weights); - printTensorDims(fc3out); - - void* fc3_bias_out = tensorAdd(fc3out, fc3_bias); - printTensorDims(fc3_bias_out); - void* fc3_relu = tensorRelu(fc3_bias_out); - - // Layer-4 - void* fc4out = tensorGemmGPU(fc3_relu, fc4_weights); - printTensorDims(fc4out); - - void* fc4_bias_out = tensorAdd(fc4out, fc4_bias); - printTensorDims(fc4_bias_out); - void* fc4_relu = tensorRelu(fc4_bias_out); - - void* result = tensorSoftmax(fc4_relu); - printTensorDims(result); - - stopProfiling(); - - computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); -} - - - - -int main(){ - - llvm_hpvm_initTensorRt(0); - - test4LayerFC(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_lenet.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_lenet.cc deleted file mode 100644 index e21b09fbf59c6ceee2adcf6df798ef04351a03ef..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/test_lenet.cc +++ /dev/null @@ -1,178 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - - -/* NOTE: Reference Architecture to use for profiling */ -void testLenetArch2(){ - - printf("********* Lenet Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet_params/conv1.bin", - CUDNN_DATA_FLOAT, 20, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_params/conv1.bias.bin", - CUDNN_DATA_FLOAT, 1, 20, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_params/conv2.bin", - CUDNN_DATA_FLOAT, 50, 20, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_params/conv2.bias.bin", - CUDNN_DATA_FLOAT, 1, 50, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_params/ip1.bin", - CUDNN_DATA_FLOAT, 1, 1, 800, 500); - void* fc1_bias = readTrainedWeights("../model_params/lenet_params/ip1.bias.bin", - CUDNN_DATA_FLOAT, 1, 1, 1, 500); - void* fc2_weights = readTrainedWeights("../model_params/lenet_params/ip2.bin", - CUDNN_DATA_FLOAT, 1, 1, 500, 10); - void* fc2_bias = readTrainedWeights("../model_params/lenet_params/ip2.bias.bin", - CUDNN_DATA_FLOAT, 1, 1, 1, 10); - - - // Start power and performnce profiling - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - void* conv1out = tensorConvolution(input, conv1_filter, 0, 0, 1, 1, - conv_mode, conv_precision); - // NOTE: For tensorAdd, the only dimension that MUST match is channels - tensorAdd(conv1out, conv1_bias); // NOTE: In place operation - printTensorDims(conv1out); - - void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); - printTensorDims(pool1out); - - // NOTE: input channels have to match between tensor op inputs and outputs - void* conv2out = tensorConvolution(pool1out, conv2_filter, 0, 0, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); // NOTE: In place operation - - printTensorDims(conv2out); - - void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); - printTensorDims(pool2out); - - void* gemm1out = tensorGemm(pool2out, fc1_weights); - printTensorDims(gemm1out); - - void* gemm1biasout = tensorGemmBias(gemm1out, fc1_bias); - printTensorDims(gemm1biasout); - - void* relu1out = tensorRelu(gemm1biasout); - printTensorDims(relu1out); - - void* gemm2out = tensorGemm(relu1out, fc2_weights); - printTensorDims(gemm2out); - - void* gemm2_biasout = tensorGemmBias(gemm2out, fc2_bias); - printTensorDims(gemm2_biasout); - - void* result = tensorSoftmax(gemm2_biasout); - printTensorDims(result); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); - // THINK: I believe that comparing the results do not need to be part of the HPVM graph -} - - -/* This architecture REMOVES the bias adds */ -void testLenetArch3(){ - - printf("********* Lenet Architecture ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 10000; - - void* input = readInputTensor("../model_params/lenet_params/datasets/t10k-images-idx3-ubyte", - CUDNN_DATA_FLOAT, - test_batch_size, 1, 28, 28); - // NOTE: Filter descriptors do NOT have batch size - // NOTE: First two dims are output channels (configurable), input channels (MUST match input channels) - // IMP: The output channels matches the trained model - not the Lenet arch proposed in Andrew Ng's class - void* conv1_filter = readTrainedWeights("../model_params/lenet_params/conv1.bin", - CUDNN_DATA_FLOAT, 20, 1, 5, 5); - void* conv1_bias = readTrainedWeights("../model_params/lenet_params/conv1.bias.bin", - CUDNN_DATA_FLOAT, 1, 20, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/lenet_params/conv2.bin", - CUDNN_DATA_FLOAT, 50, 20, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/lenet_params/conv2.bias.bin", - CUDNN_DATA_FLOAT, 1, 50, 1, 1); - void* fc1_weights = readTrainedWeights("../model_params/lenet_params/ip1.bin", - CUDNN_DATA_FLOAT, 1, 1, 800, 500); - void* fc2_weights = readTrainedWeights("../model_params/lenet_params/ip2.bin", - CUDNN_DATA_FLOAT, 1, 1, 500, 10); - - /* Convolution specific parameters */ - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - void* conv1out = tensorConvolution(input, conv1_filter, 0, 0, 1, 1, - conv_mode, conv_precision); - // NOTE: For tensorAdd, the only dimension that MUST match is channels - tensorAdd(conv1out, conv1_bias); // NOTE: In place operation - printTensorDims(conv1out); - - void* pool1out = tensorPooling(conv1out, 0, 2, 2, 0, 0, 2, 2); - printTensorDims(pool1out); - - // NOTE: input channels have to match between tensor op inputs and outputs - void* conv2out = tensorConvolution(pool1out, conv2_filter, 0, 0, 1, 1, - conv_mode, conv_precision); - printTensorDims(conv2out); - - void* pool2out = tensorPooling(conv2out, 0, 2, 2, 0, 0, 2, 2); - printTensorDims(pool2out); - - void* gemm1out = tensorGemm(pool2out, fc1_weights); - printTensorDims(gemm1out); - - void* relu1out = tensorRelu(gemm1out); - printTensorDims(relu1out); - - void* gemm2out = tensorGemm(relu1out, fc2_weights); - printTensorDims(gemm2out); - - void* result = tensorSoftmax(gemm2out); - printTensorDims(result); - - computeAccuracy("../model_params/lenet_params/datasets/t10k-labels-idx1-ubyte", test_batch_size, result); - // THINK: I believe that comparing the results do not need to be part of the HPVM graph - printf("END of Lenet Arch3 -- \n"); -} - - -int main(){ - - llvm_hpvm_initTensorRt(0); - - //testTensorAdd(); - //testTensorConv(); - //testTensorPool(); - //testTensorGemm(); - //testTensorGemmBias(); - //testTensorRelu(); - //testTensorSoftmax(); - - //testLenetArch(); - testLenetArch2(); - //testLenetArch3(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet2_cifar10_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet2_cifar10_tuner.cc deleted file mode 100644 index 08e5817fc4aa037bc59cceafc1baba382696e329..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet2_cifar10_tuner.cc +++ /dev/null @@ -1,162 +0,0 @@ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> - -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - - -int total_runs = 1; - -/* NOTE: Reference Architecture to use for profiling */ -void testCifarNet(){ - - printf("********* Alexnet2 CIFAR-10 DNN ********** \n"); - - std::string dir_prefix = std::string("../model_params/alexnet2_cifar10/"); - std::string input_path = dir_prefix + std::string("norm_cifar_input.bin"); - std::string labels_path = dir_prefix + std::string("test_labels.bin"); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv1.bin", - float_type, 32, 3, 3, 3); - void* conv1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv1_bias.bin", - float_type, 1, 32, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv2.bin", - float_type, 32, 32, 3, 3); - void* conv2_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv2_bias.bin", - float_type, 1, 32, 1, 1); - void* conv3_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv3.bin", - float_type, 64, 32, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv3_bias.bin", - float_type, 1, 64, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv4.bin", - float_type, 64, 64, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv4_bias.bin", - float_type, 1, 64, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv5.bin", - float_type, 128, 64, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv5_bias.bin", - float_type, 1, 128, 1, 1); - void* conv6_filter = readTrainedWeights("../model_params/alexnet2_cifar10/conv6.bin", - float_type, 128, 128, 3, 3); - void* conv6_bias = readTrainedWeights("../model_params/alexnet2_cifar10/conv6_bias.bin", - float_type, 1, 128, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet2_cifar10/fc1.bin", - float_type, 1, 1, 2048, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet2_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); - - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - - startMemTracking(); - - int test_input_size = 500; - int batch_size = 500; - int offset = 5000; - - int batch_count = test_input_size / batch_size; - - // NOTE: Starting time profiling - startProfiling(); - - for(int j = 0; j < total_runs; j++){ - - float final_accuracy = 0.0; - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - void* conv1out = tensorConvolution(input, conv1_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv1out, conv1_bias); - void* conv1_tanh = tensorTanh(conv1out); - - // 2nd Layer - void* conv2out = tensorConvolution(conv1_tanh, conv2_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); - void* conv2_tanh = tensorTanh(conv2out); - void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 3rd Layer - void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv3out, conv3_bias); - void* conv3_tanh = tensorTanh(conv3out); - - // 4th Layer - void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv4out, conv4_bias); - void* conv4_tanh = tensorTanh(conv4out); - void* pool4out = tensorPooling(conv4_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 5th Layer - void* conv5out = tensorConvolution(pool4out, conv5_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv5out, conv5_bias); - void* conv5_tanh = tensorTanh(conv5out); - - // 6th Layer - void* conv6out = tensorConvolution(conv5_tanh, conv6_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv6out, conv6_bias); - - void* conv6_tanh = tensorTanh(conv6out); - void* pool6out = tensorPooling(conv6_tanh, 0, 2, 2, 0, 0, 2, 2); - - // final FC Layer - void* gemm1out = tensorGemmGPU(pool6out, fc1_weights); - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - void* result = tensorSoftmax(gemm1biasout); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels, batch_size, result); - final_accuracy += accuracy; - - freeBatchMemory(); - } - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - - dumpExecutionAccuracies(); - - -} - - - -int main(int argc, char* argv[]){ - - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - llvm_hpvm_initTensorRt(0); - - testCifarNet(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} - diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet_cifar10_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet_cifar10_tuner.cc deleted file mode 100644 index d45cfa9ef3294c4c588b3abb98100dd8391529b7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/alexnet_cifar10_tuner.cc +++ /dev/null @@ -1,123 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -int main(int argc, char* argv[]){ - - int total_runs = 1; - - if (argc > 1){ - printf("argv[1] = %s \n", argv[1]); - total_runs = atoi(argv[1]); - printf("total_runs %d \n", total_runs); - } - - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/"); - //std::string input_path = dir_prefix + std::string("alexnet_calib.bin"); - //std::string labels_path = dir_prefix + std::string("alexnet_train_labels.bin"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv0.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); - std::string conv2d_1_b_path = dir_prefix + std::string("conv_bias0.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv3.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv_bias3.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv6.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv_bias6.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv7.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv_bias7.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv8.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv_bias8.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("fc12.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); - std::string dense_1_b_path = dir_prefix + std::string("fc_bias12.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 500; - int batch_size = 500; - int offset = 5000; - - int batch_count = test_input_size / batch_size; - - - // NOTE: Starting time profiling - startProfiling(); - - for(int j = 0; j < total_runs; j++){ - - float final_accuracy = 0.0; - for(int i = 0; i < batch_count; i++){ - - int start = (i * batch_size) + offset; - int end = (i + 1) * batch_size + offset; - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 5, 5, 1, 1, 1, 0); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorTanh(var_1); - void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); - void* var_5 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); - void* var_6 = tensorAdd(var_5, conv2d_2_b); - void* var_7 = tensorTanh(var_6); - void* var_8 = tensorPooling(var_7,0,2,2,0,0,2,2); - void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_11 = tensorAdd(var_10, conv2d_3_b); - void* var_12 = tensorTanh(var_11); - void* var_13 = tensorConvolution(var_12, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_14 = tensorAdd(var_13, conv2d_4_b); - void* var_15 = tensorTanh(var_14); - void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorAdd(var_16, conv2d_5_b); - void* var_18 = tensorTanh(var_17); - void* var_19 = tensorPooling(var_18,0,2,2,0,0,2,2); - void* var_22 = tensorGemmGPU(var_19, dense_1_w); - void* var_23 = tensorAdd(var_22, dense_1_b); - void* var_24 = tensorSoftmax(var_23); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_24); - final_accuracy += accuracy; - - freeBatchMemory(); - } - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/resnet18_cifar10_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/resnet18_cifar10_tuner.cc deleted file mode 100644 index 689e241c5b4a0a5e1c5b98326998f37d5e803f75..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/resnet18_cifar10_tuner.cc +++ /dev/null @@ -1,265 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - - -int main(int argc, char* argv[]){ - - int total_runs = 1; - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - - llvm_hpvm_initTensorRt(0); - - - /*int skip_tensor_ids[22]; - skip_tensor_ids[0] = 0; - skip_tensor_ids[1] = 1; - skip_tensor_ids[2] = 1; - skip_tensor_ids[3] = 3; - skip_tensor_ids[4] = 4; - skip_tensor_ids[5] = 4; - skip_tensor_ids[6] = 10; - skip_tensor_ids[7] = 11; - skip_tensor_ids[8] = 17; - skip_tensor_ids[9] = 18; - skip_tensor_ids[10] = 24; - skip_tensor_ids[11] = 25; - skip_tensor_ids[12] = 25; - skip_tensor_ids[13] = 33; - skip_tensor_ids[14] = 34; - skip_tensor_ids[15] = 35; - skip_tensor_ids[16] = 40; - skip_tensor_ids[17] = 41; - skip_tensor_ids[18] = 47; - skip_tensor_ids[19] = 48; - //--- readSkipTensors(skip_tensor_ids, 22); - //-- readSkipTensors(skip_tensor_ids, 10); - readSkipTensors(skip_tensor_ids, 20); - */ - - std::string dir_prefix = std::string("../model_params/resnet18_cifar10_promise/"); - // std::string input_path = dir_prefix + std::string("resnet18_calib.bin"); - // std::string labels_path = dir_prefix + std::string("resnet18_train_labels.bin"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,16,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,16,16,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,16,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,32,16,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,32,16,1,1); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_14_w_path = dir_prefix + std::string("conv2d_14_w.bin"); - void* conv2d_14_w = readTrainedWeights(conv2d_14_w_path.c_str(), 0,32,32,3,3); - std::string conv2d_14_b_path = dir_prefix + std::string("conv2d_14_b.bin"); - void* conv2d_14_b = readTrainedWeights(conv2d_14_b_path.c_str(), 0,1,32,1,1); - std::string conv2d_15_w_path = dir_prefix + std::string("conv2d_15_w.bin"); - void* conv2d_15_w = readTrainedWeights(conv2d_15_w_path.c_str(), 0,64,32,3,3); - std::string conv2d_15_b_path = dir_prefix + std::string("conv2d_15_b.bin"); - void* conv2d_15_b = readTrainedWeights(conv2d_15_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_17_w_path = dir_prefix + std::string("conv2d_17_w.bin"); - void* conv2d_17_w = readTrainedWeights(conv2d_17_w_path.c_str(), 0,64,32,1,1); - std::string conv2d_17_b_path = dir_prefix + std::string("conv2d_17_b.bin"); - void* conv2d_17_b = readTrainedWeights(conv2d_17_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_16_w_path = dir_prefix + std::string("conv2d_16_w.bin"); - void* conv2d_16_w = readTrainedWeights(conv2d_16_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_16_b_path = dir_prefix + std::string("conv2d_16_b.bin"); - void* conv2d_16_b = readTrainedWeights(conv2d_16_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_18_w_path = dir_prefix + std::string("conv2d_18_w.bin"); - void* conv2d_18_w = readTrainedWeights(conv2d_18_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_18_b_path = dir_prefix + std::string("conv2d_18_b.bin"); - void* conv2d_18_b = readTrainedWeights(conv2d_18_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_19_w_path = dir_prefix + std::string("conv2d_19_w.bin"); - void* conv2d_19_w = readTrainedWeights(conv2d_19_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_19_b_path = dir_prefix + std::string("conv2d_19_b.bin"); - void* conv2d_19_b = readTrainedWeights(conv2d_19_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_20_w_path = dir_prefix + std::string("conv2d_20_w.bin"); - void* conv2d_20_w = readTrainedWeights(conv2d_20_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_20_b_path = dir_prefix + std::string("conv2d_20_b.bin"); - void* conv2d_20_b = readTrainedWeights(conv2d_20_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_21_w_path = dir_prefix + std::string("conv2d_21_w.bin"); - void* conv2d_21_w = readTrainedWeights(conv2d_21_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_21_b_path = dir_prefix + std::string("conv2d_21_b.bin"); - void* conv2d_21_b = readTrainedWeights(conv2d_21_b_path.c_str(), 0,1,64,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,64,10); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 500; - int batch_size = 500; - int offset = 5000; - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - - - for(int j = 0; j < total_runs; j++){ - - float final_accuracy = 0.0; - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - void* var_2 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_3 = tensorAdd(var_2, conv2d_1_b); - void* var_4 = tensorRelu(var_3); - void* var_6 = tensorConvolution(var_4, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_7 = tensorAdd(var_6, conv2d_2_b); - void* var_8 = tensorRelu(var_7); - void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_11 = tensorAdd(var_10, conv2d_3_b); - void* var_12 = tensorAdd(var_4, var_11); - void* var_13 = tensorRelu(var_12); - void* var_15 = tensorConvolution(var_13, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_16 = tensorAdd(var_15, conv2d_4_b); - void* var_17 = tensorRelu(var_16); - void* var_19 = tensorConvolution(var_17, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_20 = tensorAdd(var_19, conv2d_5_b); - void* var_21 = tensorAdd(var_13, var_20); - void* var_22 = tensorRelu(var_21); - void* var_24 = tensorConvolution(var_22, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorAdd(var_24, conv2d_6_b); - void* var_26 = tensorRelu(var_25); - void* var_28 = tensorConvolution(var_26, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorAdd(var_28, conv2d_7_b); - void* var_30 = tensorAdd(var_22, var_29); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvolution(var_31, conv2d_8_w, 1, 1, 2, 2, 1, 0); - void* var_34 = tensorAdd(var_33, conv2d_8_b); - void* var_35 = tensorRelu(var_34); - void* var_37 = tensorConvolution(var_35, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_38 = tensorAdd(var_37, conv2d_9_b); - void* var_40 = tensorConvolution(var_31, conv2d_10_w, 0, 0, 2, 2, 1, 0); - void* var_41 = tensorAdd(var_40, conv2d_10_b); - void* var_42 = tensorAdd(var_41, var_38); - void* var_43 = tensorRelu(var_42); - void* var_45 = tensorConvolution(var_43, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_46 = tensorAdd(var_45, conv2d_11_b); - void* var_47 = tensorRelu(var_46); - void* var_49 = tensorConvolution(var_47, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_50 = tensorAdd(var_49, conv2d_12_b); - void* var_51 = tensorAdd(var_43, var_50); - void* var_52 = tensorRelu(var_51); - void* var_54 = tensorConvolution(var_52, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_55 = tensorAdd(var_54, conv2d_13_b); - void* var_56 = tensorRelu(var_55); - void* var_58 = tensorConvolution(var_56, conv2d_14_w, 1, 1, 1, 1, 1, 0); - void* var_59 = tensorAdd(var_58, conv2d_14_b); - void* var_60 = tensorAdd(var_52, var_59); - void* var_61 = tensorRelu(var_60); - void* var_63 = tensorConvolution(var_61, conv2d_15_w, 1, 1, 2, 2, 1, 0); - void* var_64 = tensorAdd(var_63, conv2d_15_b); - void* var_65 = tensorRelu(var_64); - void* var_67 = tensorConvolution(var_65, conv2d_16_w, 1, 1, 1, 1, 1, 0); - void* var_68 = tensorAdd(var_67, conv2d_16_b); - void* var_70 = tensorConvolution(var_61, conv2d_17_w, 0, 0, 2, 2, 1, 0); - void* var_71 = tensorAdd(var_70, conv2d_17_b); - void* var_72 = tensorAdd(var_71, var_68); - void* var_73 = tensorRelu(var_72); - void* var_75 = tensorConvolution(var_73, conv2d_18_w, 1, 1, 1, 1, 1, 0); - void* var_76 = tensorAdd(var_75, conv2d_18_b); - void* var_77 = tensorRelu(var_76); - void* var_79 = tensorConvolution(var_77, conv2d_19_w, 1, 1, 1, 1, 1, 0); - void* var_80 = tensorAdd(var_79, conv2d_19_b); - void* var_81 = tensorAdd(var_73, var_80); - void* var_82 = tensorRelu(var_81); - void* var_84 = tensorConvolution(var_82, conv2d_20_w, 1, 1, 1, 1, 1, 0); - void* var_85 = tensorAdd(var_84, conv2d_20_b); - void* var_86 = tensorRelu(var_85); - void* var_88 = tensorConvolution(var_86, conv2d_21_w, 1, 1, 1, 1, 1, 0); - void* var_89 = tensorAdd(var_88, conv2d_21_b); - void* var_90 = tensorAdd(var_82, var_89); - void* var_91 = tensorRelu(var_90); - void* var_92 = tensorPooling(var_91,1,8,8,0,0,8,8); - void* var_94 = tensorGemmGPU(var_92, dense_1_w); - void* var_95 = tensorAdd(var_94, dense_1_b); - void* var_96 = tensorSoftmax(var_95); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_96); - final_accuracy += accuracy; - - freeBatchMemory(); - } - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - dumpExecutionAccuracies(); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_top5_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_top5_tuner.cc deleted file mode 100644 index 56e0e7016f16ce6548d9947e13fda96a931e436b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_top5_tuner.cc +++ /dev/null @@ -1,167 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); - //std::string input_path = dir_prefix + std::string("vgg16_cifar100_calib.bin"); - //std::string labels_path = dir_prefix + std::string("vgg16_cifar100_train_labels.bin"); - - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - startMemTracking(); - - int test_input_size = 4000; - int batch_size = 4000; - int offset = 5000; - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_5 = tensorAdd(var_4, conv2d_2_b); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); - void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_9 = tensorAdd(var_8, conv2d_3_b); - void* var_10 = tensorRelu(var_9); - void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_13 = tensorAdd(var_12, conv2d_4_b); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); - void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorAdd(var_16, conv2d_5_b); - void* var_18 = tensorRelu(var_17); - void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_21 = tensorAdd(var_20, conv2d_6_b); - void* var_22 = tensorRelu(var_21); - void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorAdd(var_24, conv2d_7_b); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); - void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorAdd(var_28, conv2d_8_b); - void* var_30 = tensorRelu(var_29); - void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_33 = tensorAdd(var_32, conv2d_9_b); - void* var_34 = tensorRelu(var_33); - void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); - void* var_37 = tensorAdd(var_36, conv2d_10_b); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); - void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_41 = tensorAdd(var_40, conv2d_11_b); - void* var_42 = tensorRelu(var_41); - void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_45 = tensorAdd(var_44, conv2d_12_b); - void* var_46 = tensorRelu(var_45); - void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_49 = tensorAdd(var_48, conv2d_13_b); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); - void* var_54 = tensorGemmGPU(var_51, dense_1_w); - void* var_55 = tensorAdd(var_54, dense_1_b); - void* var_56 = tensorRelu(var_55); - void* var_58 = tensorGemmGPU(var_56, dense_2_w); - void* var_59 = tensorAdd(var_58, dense_2_b); - void* var_60 = tensorSoftmax(var_59); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - //float accuracy = computeAccuracy2(labels, batch_size, var_60, 100); - float accuracy = computeTop5Accuracy(labels, batch_size, var_60, 100); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_tuner.cc deleted file mode 100644 index 18e419553641160d59930a72695ec0a191c06d74..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar100_tuner.cc +++ /dev/null @@ -1,166 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - std::string dir_prefix = std::string("../model_params/vgg16_cifar100_front/"); - //std::string input_path = dir_prefix + std::string("vgg16_cifar100_calib.bin"); - //std::string labels_path = dir_prefix + std::string("vgg16_cifar100_train_labels.bin"); - - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,100); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - - - startMemTracking(); - - int test_input_size = 5000; - int batch_size = 5000; - int offset = 5000; - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_5 = tensorAdd(var_4, conv2d_2_b); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); - void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_9 = tensorAdd(var_8, conv2d_3_b); - void* var_10 = tensorRelu(var_9); - void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_13 = tensorAdd(var_12, conv2d_4_b); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); - void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorAdd(var_16, conv2d_5_b); - void* var_18 = tensorRelu(var_17); - void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_21 = tensorAdd(var_20, conv2d_6_b); - void* var_22 = tensorRelu(var_21); - void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorAdd(var_24, conv2d_7_b); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); - void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorAdd(var_28, conv2d_8_b); - void* var_30 = tensorRelu(var_29); - void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_33 = tensorAdd(var_32, conv2d_9_b); - void* var_34 = tensorRelu(var_33); - void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); - void* var_37 = tensorAdd(var_36, conv2d_10_b); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); - void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_41 = tensorAdd(var_40, conv2d_11_b); - void* var_42 = tensorRelu(var_41); - void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_45 = tensorAdd(var_44, conv2d_12_b); - void* var_46 = tensorRelu(var_45); - void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_49 = tensorAdd(var_48, conv2d_13_b); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); - void* var_54 = tensorGemmGPU(var_51, dense_1_w); - void* var_55 = tensorAdd(var_54, dense_1_b); - void* var_56 = tensorRelu(var_55); - void* var_58 = tensorGemmGPU(var_56, dense_2_w); - void* var_59 = tensorAdd(var_58, dense_2_b); - void* var_60 = tensorSoftmax(var_59); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - - float accuracy = computeAccuracy2(labels, batch_size, var_60, 100); - final_accuracy += accuracy; - freeBatchMemory(); - - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - llvm_hpvm_cleanupTensorRt(); - - return 0; -} diff --git a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar10_tuner.cc b/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar10_tuner.cc deleted file mode 100644 index 552001ba7af481845f75cd95e3249bc7ba7d0a97..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/dnn_sources/src/legacy/tuner/vgg16_cifar10_tuner.cc +++ /dev/null @@ -1,180 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - - -int main(int argc, char* argv[]){ - - int total_runs = 1; - if (argc > 1){ - total_runs = atoi(argv[1]); - } - - - llvm_hpvm_initTensorRt(1); - - std::string dir_prefix = std::string("../model_params/vgg16_cifar10_2/"); - //std::string input_path = dir_prefix + std::string("vgg16_cifar_calib.bin"); - //std::string labels_path = dir_prefix + std::string("vgg16_train_labels.bin"); - std::string input_path = dir_prefix + std::string("input.bin"); - std::string labels_path = dir_prefix + std::string("labels.bin"); - - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); - std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); - std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); - void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); - void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); - void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); - void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); - void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); - std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); - void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); - void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); - void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); - void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); - void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); - void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); - void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); - void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); - void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); - std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); - void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); - std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); - void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); - std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); - std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); - std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); - void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); - std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); - void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 500; - int batch_size = 500; - int offset = 5000; - - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - - for(int j = 0; j < total_runs; j++){ - - float final_accuracy = 0.0; - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size + offset; - int end = (i + 1) * batch_size + offset; - - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); - void* var_5 = tensorAdd(var_4, conv2d_2_b); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); - void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_9 = tensorAdd(var_8, conv2d_3_b); - void* var_10 = tensorRelu(var_9); - void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_13 = tensorAdd(var_12, conv2d_4_b); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); - void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorAdd(var_16, conv2d_5_b); - void* var_18 = tensorRelu(var_17); - void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); - void* var_21 = tensorAdd(var_20, conv2d_6_b); - void* var_22 = tensorRelu(var_21); - void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); - void* var_25 = tensorAdd(var_24, conv2d_7_b); - void* var_26 = tensorRelu(var_25); - void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); - void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); - void* var_29 = tensorAdd(var_28, conv2d_8_b); - void* var_30 = tensorRelu(var_29); - void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); - void* var_33 = tensorAdd(var_32, conv2d_9_b); - void* var_34 = tensorRelu(var_33); - void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); - void* var_37 = tensorAdd(var_36, conv2d_10_b); - void* var_38 = tensorRelu(var_37); - void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); - void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); - void* var_41 = tensorAdd(var_40, conv2d_11_b); - void* var_42 = tensorRelu(var_41); - void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); - void* var_45 = tensorAdd(var_44, conv2d_12_b); - void* var_46 = tensorRelu(var_45); - void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); - void* var_49 = tensorAdd(var_48, conv2d_13_b); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); - void* var_54 = tensorGemmGPU(var_51, dense_1_w); - void* var_55 = tensorAdd(var_54, dense_1_b); - void* var_56 = tensorRelu(var_55); - void* var_58 = tensorGemmGPU(var_56, dense_2_w); - void* var_59 = tensorAdd(var_58, dense_2_b); - void* var_60 = tensorSoftmax(var_59); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_60); - final_accuracy += accuracy; - - freeBatchMemory(); - } - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - } - - dumpExecutionAccuracies(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/CMakeLists.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/CMakeLists.txt deleted file mode 100644 index 095e037430dbf1751dddfd047d0cf0157ad9e2e7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/CMakeLists.txt +++ /dev/null @@ -1,119 +0,0 @@ -cmake_minimum_required (VERSION 2.6) -project (cudnn-training) - -find_package(CUDA 6.5 REQUIRED) - - -if (CMAKE_BUILD_TYPE STREQUAL "Debug") - message("Debug mode") - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-g;-lineinfo;-Xcompiler;-ggdb;-lcurand) -else() - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_60,code=compute_60;-std=c++11;-DNDEBUG;-Xcompiler;-DNDEBUG;-lcurand) -endif() - -set(CUDA_PROPAGATE_HOST_FLAGS OFF) - -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -I/ " ) - -add_definitions(-DNO_INJECTION) -add_definitions(-DPROMISE_TUNER_ENABLED) -if(USE_GFLAGS) - add_definitions(-DUSE_GFLAGS) -endif() - -if(USE_AUTOTUNER) - remove_definitions(-DNO_INJECTION) -endif() - - - -include_directories($ENV{CUDNN_PATH} /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/include) -include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/./tensor_runtime/include) -include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../gpu_profiler/include) -include_directories(/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../soc_simulator/include) -link_directories($ENV{CUDNN_PATH} /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/lib /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/$ENV{CUDNN_PATH}/lib64) - - -cuda_add_library(tensor_runtime /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_runtime.cu) -cuda_add_cublas_to_target(tensor_runtime) - -cuda_add_library(tensor_cpu_runtime /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/tensor_runtime/src/tensor_cpu_runtime.cc) - -find_library(GPU_PROFILER_LIB - NAMES libgpu_profiler.a - HINTS /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../gpu_profiler/lib -) - -find_library(SOC_SIMULATOR_LIB - NAMES libpromise_profiler.a - HINTS /home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/../soc_simulator/lib -) - - -if(USE_GFLAGS) - target_link_libraries(tensor_runtime gflags cudnn -lcurand) -else() - target_link_libraries(tensor_runtime cudnn -lcurand) -endif() - -target_link_libraries(tensor_cpu_runtime) - -# lenet_keras_half_autogenerated_knobs -add_executable(lenet_keras_fp16_perf20 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf20.cc) -target_link_libraries(lenet_keras_fp16_perf20 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_perf26 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf26.cc) -target_link_libraries(lenet_keras_fp16_perf26 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_perf22 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf22.cc) -target_link_libraries(lenet_keras_fp16_perf22 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_perf25 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf25.cc) -target_link_libraries(lenet_keras_fp16_perf25 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_perf23 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf23.cc) -target_link_libraries(lenet_keras_fp16_perf23 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_samp33 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp33.cc) -target_link_libraries(lenet_keras_fp16_samp33 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_perf24 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf24.cc) -target_link_libraries(lenet_keras_fp16_perf24 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_samp31 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp31.cc) -target_link_libraries(lenet_keras_fp16_samp31 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_perf30 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf30.cc) -target_link_libraries(lenet_keras_fp16_perf30 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_samp36 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp36.cc) -target_link_libraries(lenet_keras_fp16_samp36 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_perf21 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf21.cc) -target_link_libraries(lenet_keras_fp16_perf21 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_samp34 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp34.cc) -target_link_libraries(lenet_keras_fp16_samp34 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_samp32 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp32.cc) -target_link_libraries(lenet_keras_fp16_samp32 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_samp35 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_samp35.cc) -target_link_libraries(lenet_keras_fp16_samp35 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_perf29 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf29.cc) -target_link_libraries(lenet_keras_fp16_perf29 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_perf27 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf27.cc) -target_link_libraries(lenet_keras_fp16_perf27 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(lenet_keras_fp16_perf28 lenet_keras_half_autogenerated_knobs/lenet_keras_fp16_perf28.cc) -target_link_libraries(lenet_keras_fp16_perf28 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - - - -# lenet_keras_autogenerated_knobs -add_executable(lenet_keras_fp32_perf20 lenet_keras_autogenerated_knobs/lenet_keras_fp32_perf20.cc) -target_link_libraries(lenet_keras_fp32_perf20 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - - diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_different_clock_frequencies_testing_automator.py b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_different_clock_frequencies_testing_automator.py deleted file mode 100644 index d787af8ec350b7fa2f2eeb2b0ed4c3ae4c015c95..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_different_clock_frequencies_testing_automator.py +++ /dev/null @@ -1,139 +0,0 @@ -# Automates online benchmark testing with different clock speeds -# Input: GPU clock speed, DDR clock speed, set of benchmark names to test -# Set of benchmarks format: (full_bin_name, half_bin_name) -import os -import sys - -from subprocess import Popen, PIPE - -def set_clock_speeds(gpu_speed_mhz, ddr_speed_mhz): - def find_closest_clock_speed(goal_speed): - # Reads /sys/devices/17000000.gp10b/devfreq/17000000.gp10b/available_frequencies - # and finds the closest clock speed - AVAIL_FREQS = "/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/available_frequencies" - avail_freqs_file = open(AVAIL_FREQS, "r") - avail_speeds_lst = avail_freqs_file.read().strip().split() - avail_freqs_file.close() - - min_diff = abs(gpu_speed - int(avail_speeds_lst[0])) - closest_speed = int(avail_speeds_lst[0]) - for avail_speed in avail_speeds_lst[1:]: - avail_speed = int(avail_speed) - curr_diff = abs(gpu_speed - avail_speed) - if curr_diff < min_diff: - min_diff = curr_diff - closest_speed = avail_speed - return closest_speed - - new_conf_filename = 'jetson_clocks_conf%d_%d.txt' % (gpu_speed_mhz, ddr_speed_mhz) - curr_conf_filename = "jetson_clocks_conf_backup.txt" - if os.path.isfile(curr_conf_filename): - os.remove(curr_conf_filename) - - # Get the current configurations in a file - sudo_password = 'nvidia' - p = Popen(['sudo', '/home/nvidia/jetson_clocks.sh', '--store', curr_conf_filename], \ - stdin=PIPE, universal_newlines=True) - p.communicate(sudo_password + '\n') - assert p.returncode == 0 - - # Read the current config file in - curr_conf_file = open(curr_conf_filename, "r") - curr_confs = curr_conf_file.read().strip().split('\n') - curr_conf_file.close() - - GPU_MIN_FREQ = "/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/min_freq" - GPU_MAX_FREQ = "/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/max_freq" - GPU_CUR_FREQ = "/sys/devices/17000000.gp10b/devfreq/17000000.gp10b/cur_freq" - - DDR_UPDATE_PATH = "/sys/kernel/debug/bpmp/debug/clk/emc/rate" - - # Copy everything in the old configuration except for the GPU/DDR lines - new_conf_file = open(new_conf_filename, "w") - for line in curr_confs: - # Write the GPU clock frequencies at the end to configure the clocks even if - # the current configuration doesn't have one of the lines - if line.startswith(GPU_MIN_FREQ) or line.startswith(GPU_MAX_FREQ) or \ - line.startswith(GPU_CUR_FREQ) or line.startswith(DDR_UPDATE_PATH): - continue - else: - new_conf_file.write("%s\n" % line) - - MHZ_TO_HZ_MULT = 1000000 - gpu_speed = gpu_speed_mhz * MHZ_TO_HZ_MULT - ddr_speed = ddr_speed_mhz * MHZ_TO_HZ_MULT - - # Set GPU - closest_gpu_speed = find_closest_clock_speed(gpu_speed) - print("Setting GPU speed to %d" % closest_gpu_speed) - new_conf_file.write("%s:%d\n" % (GPU_MIN_FREQ, closest_gpu_speed)) - new_conf_file.write("%s:%d\n" % (GPU_MAX_FREQ, closest_gpu_speed)) - #new_conf_file.write("%s:%d\n" % (GPU_CUR_FREQ, closest_gpu_speed)) - - # Set DDR - new_conf_file.write("%s:%d\n" % (DDR_UPDATE_PATH, ddr_speed)) - new_conf_file.close() - - # Set the new configuration - p = Popen(['sudo', '/home/nvidia/jetson_clocks.sh', '--restore', new_conf_filename], \ - stdin=PIPE, universal_newlines=True) - p.communicate(sudo_password + '\n') - assert p.returncode == 0 - print("SUCCESSFULLY SET CLOCK SPEEDS") - - -def run_benchmark(bin_name, should_print_bin_output): - print("RUNNING %s" % bin_name) - proc = Popen("./%s" % bin_name, stdout = PIPE, universal_newlines = True) - proc_output = proc.communicate()[0] - assert proc.returncode == 0 - - if should_print_bin_output: - print(proc_output) - print("FINISHED RUNNING %s" % bin_name) - return proc_output - - -def parse_binary_output(proc_output): - avg_time_key_ind = proc_output.find("Average time:") - assert avg_time_key_ind >= 0 - avg_time = proc_output[avg_time_key_ind : proc_output.find("\n", avg_time_key_ind)] - print(avg_time) - return avg_time - - -# Input: a list of tuples of benchmark names -# Can change to input a file containing benchmarks to run -def run_benchmarks(benchmarks_filename, output_filename, should_print_bin_output): - benchmarks_file = open(benchmarks_filename, "r") - output_file = open(output_filename, "w") - - def parse_binary_names_tuple(tuple_line): - tuple_line = tuple_line.replace("(", "").replace(")", "").strip().split(',') - return tuple_line[0].strip(), tuple_line[1].strip() - - for line in benchmarks_file: - full_bin_name, half_bin_name = parse_binary_names_tuple(line) - output_file.write("%s: %s\n" % (full_bin_name, \ - parse_binary_output(run_benchmark(full_bin_name, should_print_bin_output)))) - output_file.write("%s: %s\n" % (half_bin_name, \ - parse_binary_output(run_benchmark(half_bin_name, should_print_bin_output)))) - - benchmarks_file.close() - output_file.close() - - -if __name__ == "__main__": - num_args = len(sys.argv) - - if num_args != 5 and num_args != 6: - print("Usage: python online_benchmark_testing_automator.py <gpu freq in MHz> <ddr freq in MHz> <binary_names_file> <output_file> [1 to print binary output]") - print("Binary names file format: (full_binary_name, half_binary_name)<newline>") - exit(1) - print("GPU clock speed: %s" % sys.argv[1]) - print("DDR clock speed: %s" % sys.argv[2]) - print("Benchmarks file name: %s" % sys.argv[3]) - print("Output file name: %s" % sys.argv[4]) - - set_clock_speeds(int(sys.argv[1]), int(sys.argv[2])) - run_benchmarks(sys.argv[3], sys.argv[4], num_args == 6 and sys.argv[-1] == "1") diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_testing_automator.py b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_testing_automator.py deleted file mode 100644 index 197b653d3bf6983a9500badcc4766bac1274fb63..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/benchmark_testing_automator.py +++ /dev/null @@ -1,90 +0,0 @@ -# Automates online benchmark testing with different clock speeds -# Input: set of benchmark names to test -# Set of benchmarks format: (full_bin_name, half_bin_name) -import os -import sys - -from collections import defaultdict -from subprocess import Popen, PIPE - -def run_benchmark(bin_name): - print("RUNNING %s" % bin_name) - proc = Popen("./%s" % bin_name, stdout = PIPE, universal_newlines = True) - proc_output = proc.communicate()[0] - assert proc.returncode == 0 - print("FINISHED RUNNING %s" % bin_name) - return proc_output - - -def parse_binary_output(proc_output, per_tensor): - final_acc_key_ind = proc_output.find("**** Final Accuracy") - assert final_acc_key_ind >= 0 - final_acc = proc_output[final_acc_key_ind : proc_output.find("\n", final_acc_key_ind)] - print(final_acc) - - if per_tensor: - first_op_ind = proc_output.find("Operation ") - total_op_ind = proc_output.find('\n', proc_output.find("Total energy")) - assert first_op_ind >= 0 - assert total_op_ind >= 0 - - time_energy_output = proc_output[first_op_ind : total_op_ind] - print(time_energy_output) - return time_energy_output, final_acc - - else: - avg_time_key_ind = proc_output.find("Average time:") - assert avg_time_key_ind >= 0 - - avg_time = proc_output[avg_time_key_ind : proc_output.find("\n", avg_time_key_ind)] - print(avg_time) - - return avg_time, final_acc - - -def get_sorted_binaries(builds_dir): - # dict of network names to lists of binaries - # list of binaries should be in sorted order (can do that when we run the benchmarks) - network_bins = defaultdict(list) - for bin_name in os.listdir(builds_dir): - if bin_name.find("profiling") == -1: - continue - network_name = bin_name[ : bin_name.rfind("_")] - network_bins[network_name].append(bin_name) - return network_bins - - -# Input: a list of tuples of benchmark names -# Can change to input a file containing benchmarks to run -def run_benchmarks(sorted_bins, builds_dir, output_filename, per_tensor): - def get_knob_id(bin_name): - return int(bin_name[bin_name.rfind("_") + 1 : ]) - - output_file = open(output_filename, "w", buffering = 0) - for network_name in sorted(sorted_bins.keys()): - # Sort the binaries in order by knob id - sorted_bins[network_name].sort(key = get_knob_id) - print("--------------------------------------") - print(network_name) - output_file.write("--------------------------------------\n%s\n" % network_name) - - # Go through all binaries - for bin_name in sorted_bins[network_name]: - print(bin_name) - binary_output = run_benchmark(os.path.join(builds_dir, bin_name)) - time_energy_output, final_acc = parse_binary_output(binary_output, per_tensor) - output_file.write("%s, %s, %s\n\n" % (bin_name, time_energy_output, final_acc)) - print("--------------------------------------\n") - output_file.write("--------------------------------------\n\n") - output_file.close() - -if __name__ == "__main__": - num_args = len(sys.argv) - - if num_args != 3 and num_args != 4: - print("Usage: python online_benchmark_testing_automator.py <builds dir> <outputs_file_name> [per_tensor]") - print("To delete autogen dirs: python online_benchmark_testing_automator.py clean") - exit(1) - print("Output file name: %s" % sys.argv[2]) - sorted_bins = get_sorted_binaries(sys.argv[1]) - run_benchmarks(sorted_bins, sys.argv[1], sys.argv[2], num_args == 4 and sys.argv[3] == "per_tensor") diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/cmakelists_generator.py b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/cmakelists_generator.py deleted file mode 100644 index 04f6c5eec378276cd0c89fcc7013cb6996a90f2f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/cmakelists_generator.py +++ /dev/null @@ -1,109 +0,0 @@ -# Generates a CMakeLists.txt file for all generated files in a specific directory -# Input: Arbitrarily long list containing names of all generated files directories -# Ex: alexnet_cifar10_autogenerated_knobs mobilenet_cifar10_autogenerated_knobs -# If inputted 0 parameters: Generates CMakeLists.txt file for all generated files in CURRENT dir - -import sys -import os - -def get_all_generated_directory_names(): - ''' - Returns a list of all generated source code directories (<>_autogenerated_knobs) - in the current directory. Called when program is run with 0 args - ''' - generated_dir_names = [] - for dir_name in os.listdir("."): - print(dir_name) - if dir_name.endswith("autogenerated_knobs"): - generated_dir_names.append(dir_name) - return generated_dir_names - - -def generate_cmakelists_setup(cmakelists_file): - ''' - Copies over all the setup instructions (ex: finding libraries) from a "base" CMakeLists.txt - file. Ends copyng when we find the first instance of add_executable - - Args: - cmakelists_file: File object to write cmake instructions to - - Assumption: All setup instructions are being any add_executable instructions - ''' - BASE_CMAKELISTS_PATH = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt" - base_cmakelists_file = open(os.path.join(BASE_CMAKELISTS_PATH, "CMakeLists.txt"), "r") - - find_lib_line = "" - - for line in base_cmakelists_file: - if line.find("add_executable") != -1: - break - - elif line.startswith("#"): - continue - - # Special case: ignore / if -I flag exists - elif line.find("/") != -1 and line.find("-I") == -1: - dot_dot_slash_ind = line.find("../") - dot_slash_ind = line.find("./") - if dot_dot_slash_ind != -1: - start_ind = dot_dot_slash_ind - elif dot_slash_ind != -1: - start_ind = dot_slash_ind - else: - slash_ind = line.find("/") - prev_space_ind = line[:slash_ind].rfind(" ") - start_ind = prev_space_ind + 1 - - old_rel_path = [] - while start_ind < len(line): - if line[start_ind] == ")" or line[start_ind].isspace(): - break - old_rel_path.append(line[start_ind]) - start_ind += 1 - old_rel_path = ''.join(old_rel_path) - if os.path.isabs(old_rel_path): - cmakelists_file.write(line) - else: - new_path = os.path.join(BASE_CMAKELISTS_PATH, old_rel_path) - cmakelists_file.write(line.replace(old_rel_path, new_path)) - continue - cmakelists_file.write(line) - base_cmakelists_file.close() - - -def generate_cmakelists_file(cmakelists_file, source_file_dirs): - generate_cmakelists_setup(cmakelists_file) - LIBRARIES = "tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}" - cmake_instrs = [] - - for source_file_dir in source_file_dirs: - cmake_instrs.append("# %s" % source_file_dir) - for source_file in os.listdir(source_file_dir): - # Executable name = name of source code file without file extension - file_ext_ind = source_file.find(".cc") - if file_ext_ind == -1: - print("WARNING: Found file with wrong extension. Skipping. %s" % source_file) - continue - exec_name = source_file[ : file_ext_ind] - - source_file_path = os.path.join(source_file_dir, source_file) - cmake_instrs.append("add_executable(%s %s)" % (exec_name, source_file_path)) - cmake_instrs.append("target_link_libraries(%s %s)\n" % (exec_name, LIBRARIES)) - cmake_instrs.append("\n") - cmakelists_file.write('\n'.join(cmake_instrs)) - - -if __name__ == "__main__": - num_args = len(sys.argv) - - if num_args >= 2 and sys.argv[1] == "--usage": - print("python cmakelists_generator.py <names of all generated files directories>") - print("If given no parameters: Generates CMakeLists.txt file for all generated files in CURRENT directory") - exit(1) - - cmakelists_file = open("CMakeLists.txt", "w") - if num_args == 1: - generate_cmakelists_file(cmakelists_file, get_all_generated_directory_names()) - else: - generate_cmakelists_file(cmakelists_file, sys.argv[1:]) - cmakelists_file.close() diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16.txt deleted file mode 100644 index 563d7f4a03b3b3a50e2c08c76616a88ea7958b5a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16.txt +++ /dev/null @@ -1,7 +0,0 @@ -../dnn_sources/src/half/profiling/alexnet2_cifar10_half_profiling.cc -../dnn_sources/src/half/profiling/alexnet_cifar10_half_profiling.cc -../dnn_sources/src/half/profiling/mobilenet_depthwise_half_profiling.cc -../dnn_sources/src/half/profiling/mobilenet_shallow_depthwise_half_profiling.cc -../dnn_sources/src/half/profiling/resnet18_cifar10_half_profiling.cc -../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc -../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_first_three.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_first_three.txt deleted file mode 100644 index 4a0beb250e2241c7523e69b5262cb9ffc977d28d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_first_three.txt +++ /dev/null @@ -1,3 +0,0 @@ -../dnn_sources/src/half/profiling/alexnet2_cifar10_half_profiling.cc -../dnn_sources/src/half/profiling/alexnet_cifar10_half_profiling.cc -../dnn_sources/src/half/profiling/resnet18_cifar10_half_profiling.cc diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_remainder.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_remainder.txt deleted file mode 100644 index 20ca95abcf1ee1aab337fa391abb5f1a74583fe1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_remainder.txt +++ /dev/null @@ -1,4 +0,0 @@ -../dnn_sources/src/half/profiling/mobilenet_depthwise_half_profiling.cc -../dnn_sources/src/half/profiling/mobilenet_shallow_depthwise_half_profiling.cc -../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc -../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_sources.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_sources.txt deleted file mode 100644 index 506497e42889dc1d8bb2465912e87f56464e7ecc..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp16_sources.txt +++ /dev/null @@ -1 +0,0 @@ -../dnn_sources/src/half/lenet_keras_half.cc diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32.txt deleted file mode 100644 index 12b87930416c4269a62f2020a06b42cf5cf4dc13..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32.txt +++ /dev/null @@ -1,9 +0,0 @@ -../dnn_sources/src/profiling/alexnet2_profiling.cc -../dnn_sources/src/profiling/alexnet_cifar10_profiling.cc -../dnn_sources/src/profiling/mobilenet_cifar10_profiling.cc -../dnn_sources/src/profiling/mobilenet_shallow_profiling.cc -../dnn_sources/src/profiling/mobilenet_depthwise_profiling.cc -../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc -../dnn_sources/src/profiling/resnet18_cifar10_profiling.cc -../dnn_sources/src/profiling/vgg16_cifar100_profiling.cc -../dnn_sources/src/profiling/vgg16_cifar10_profiling.cc diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_sources.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_sources.txt deleted file mode 100644 index cd8f03c30712f0162db2cc8bcf563087be05bf64..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_sources.txt +++ /dev/null @@ -1 +0,0 @@ -../dnn_sources/src/lenet_keras.cc diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_test.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_test.txt deleted file mode 100644 index a59f773cda240a311c0c873c9366494018b87312..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_fp32_test.txt +++ /dev/null @@ -1 +0,0 @@ -../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_mobilenet_depth.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_mobilenet_depth.txt deleted file mode 100644 index 2b7382da3570917c1983ad0c3fe02763d8565635..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_mobilenet_depth.txt +++ /dev/null @@ -1,2 +0,0 @@ -../dnn_sources/src/profiling/mobilenet_depthwise_profiling.cc -../dnn_sources/src/profiling/mobilenet_shallow_depthwise_profiling.cc diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_one_file.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_one_file.txt deleted file mode 100644 index 32b18d4ca22672be6b44ecb674ea3ad00e18276d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/filenames_one_file.txt +++ /dev/null @@ -1,2 +0,0 @@ -../dnn_sources/src/half/profiling/vgg16_cifar100_half_profiling.cc -../dnn_sources/src/half/profiling/vgg16_cifar10_half_profiling.cc diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16.txt deleted file mode 100644 index 207eb1ed1f45ffde7dad0da4e125aa0ceaa5c5cd..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16.txt +++ /dev/null @@ -1,17 +0,0 @@ -perf,20 1,1,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf -perf,21 1,2,1,0 2.25 tensorHalfConvolution tensorConvApproxHalf -perf,22 1,2,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf -perf,23 1,3,1,0 1.88 tensorHalfConvolution tensorConvApproxHalf -perf,24 1,3,1,1 1.88 tensorHalfConvolution tensorConvApproxHalf -perf,25 1,3,1,2 1.88 tensorHalfConvolution tensorConvApproxHalf -perf,26 2,1,1,0 2.25 tensorHalfConvolution tensorConvApproxHalf -perf,27 2,1,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf -perf,28 3,1,1,0 1.88 tensorHalfConvolution tensorConvApproxHalf -perf,29 3,1,1,1 1.88 tensorHalfConvolution tensorConvApproxHalf -perf,30 3,1,1,2 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,31 1,1,2,0 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,32 1,1,2,1 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,33 1,1,4,0 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,34 1,1,4,1 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,35 1,1,4,2 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,36 1,1,4,3 1.88 tensorHalfConvolution tensorConvApproxHalf diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_knobs_31_36.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_knobs_31_36.txt deleted file mode 100644 index fc76565110cf34ab57024dd852c1a51b23a8f45e..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_knobs_31_36.txt +++ /dev/null @@ -1,6 +0,0 @@ -samp,31 1,1,2,0 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,32 1,1,2,1 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,33 1,1,4,0 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,34 1,1,4,1 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,35 1,1,4,2 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,36 1,1,4,3 1.88 tensorHalfConvolution tensorConvApproxHalf diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_old.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_old.txt deleted file mode 100644 index 72c43e61288c532feed94f5768357b3113d5de49..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_old.txt +++ /dev/null @@ -1,18 +0,0 @@ -perf,20 1,1,0 2.25 tensorHalfConvolution tensorConvPerfCudaHalf -perf,21 1,2,0 2.25 tensorHalfConvolution tensorConvPerfCudaHalf -perf,22 1,2,1 2.25 tensorHalfConvolution tensorConvPerfCudaHalf -perf,23 1,3,0 1.88 tensorHalfConvolution tensorConvPerfCudaHalf -perf,24 1,3,1 1.88 tensorHalfConvolution tensorConvPerfCudaHalf -perf,25 1,3,2 1.88 tensorHalfConvolution tensorConvPerfCudaHalf -perf,26 2,1,0 2.25 tensorHalfConvolution tensorConvPerfCudaHalf -perf,27 2,1,1 2.25 tensorHalfConvolution tensorConvPerfCudaHalf -perf,28 3,1,0 1.88 tensorHalfConvolution tensorConvPerfCudaHalf -perf,29 3,1,1 1.88 tensorHalfConvolution tensorConvPerfCudaHalf -perf,30 3,1,2 1.88 tensorHalfConvolution tensorConvPerfCudaHalf -samp,31 2,0 1.88 tensorHalfConvolution tensorConvInputHalf -samp,32 2,1 1.88 tensorHalfConvolution tensorConvInputHalf -samp,33 4,0 1.88 tensorHalfConvolution tensorConvInputHalf -samp,34 4,1 1.88 tensorHalfConvolution tensorConvInputHalf -samp,35 4,2 1.88 tensorHalfConvolution tensorConvInputHalf -samp,36 4,3 1.88 tensorHalfConvolution tensorConvInputHalf -samp,37 1,1 1.88 tensorHalfConvolution tensorConvInputHalf diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_samp.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_samp.txt deleted file mode 100644 index 0f0593226f6fbeddda91046e7416fe108bfb6d90..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_samp.txt +++ /dev/null @@ -1,7 +0,0 @@ -samp,31 2,0 1.88 tensorHalfConvolution tensorConvInputHalf -samp,32 2,1 1.88 tensorHalfConvolution tensorConvInputHalf -samp,33 4,0 1.88 tensorHalfConvolution tensorConvInputHalf -samp,34 4,1 1.88 tensorHalfConvolution tensorConvInputHalf -samp,35 4,2 1.88 tensorHalfConvolution tensorConvInputHalf -samp,36 4,3 1.88 tensorHalfConvolution tensorConvInputHalf -samp,37 1,1 1.88 tensorHalfConvolution tensorConvInputHalf diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_vgg16.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_vgg16.txt deleted file mode 100644 index a172a4e515ebfd24a51267da8bac2cb5f13ce6c0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp16_vgg16.txt +++ /dev/null @@ -1,13 +0,0 @@ -perf,20 1,1,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf -perf,21 1,2,1,0 2.25 tensorHalfConvolution tensorConvApproxHalf -perf,22 1,2,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf -perf,23 1,3,1,0 1.88 tensorHalfConvolution tensorConvApproxHalf -perf,24 1,3,1,1 1.88 tensorHalfConvolution tensorConvApproxHalf -perf,25 1,3,1,2 1.88 tensorHalfConvolution tensorConvApproxHalf -perf,26 2,1,1,0 2.25 tensorHalfConvolution tensorConvApproxHalf -perf,27 2,1,1,1 2.25 tensorHalfConvolution tensorConvApproxHalf -perf,28 3,1,1,0 1.88 tensorHalfConvolution tensorConvApproxHalf -perf,29 3,1,1,1 1.88 tensorHalfConvolution tensorConvApproxHalf -perf,30 3,1,1,2 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,32 1,1,2,1 1.88 tensorHalfConvolution tensorConvApproxHalf -samp,36 1,1,4,3 1.88 tensorHalfConvolution tensorConvApproxHalf diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32.txt deleted file mode 100644 index 78f3e361ee8a96c6520793b435815210e1fc7117..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32.txt +++ /dev/null @@ -1,17 +0,0 @@ -perf,20 1,1,1,1 2.25 tensorConvolution tensorConvApprox -perf,21 1,2,1,0 2.25 tensorConvolution tensorConvApprox -perf,22 1,2,1,1 2.25 tensorConvolution tensorConvApprox -perf,23 1,3,1,0 1.88 tensorConvolution tensorConvApprox -perf,24 1,3,1,1 1.88 tensorConvolution tensorConvApprox -perf,25 1,3,1,2 1.88 tensorConvolution tensorConvApprox -perf,26 2,1,1,0 2.25 tensorConvolution tensorConvApprox -perf,27 2,1,1,1 2.25 tensorConvolution tensorConvApprox -perf,28 3,1,1,0 1.88 tensorConvolution tensorConvApprox -perf,29 3,1,1,1 1.88 tensorConvolution tensorConvApprox -perf,30 3,1,1,2 1.88 tensorConvolution tensorConvApprox -samp,31 1,1,2,0 1.88 tensorConvolution tensorConvApprox -samp,32 1,1,2,1 1.88 tensorConvolution tensorConvApprox -samp,33 1,1,4,0 1.88 tensorConvolution tensorConvApprox -samp,34 1,1,4,1 1.88 tensorConvolution tensorConvApprox -samp,35 1,1,4,2 1.88 tensorConvolution tensorConvApprox -samp,36 1,1,4,3 1.88 tensorConvolution tensorConvApprox diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_baseline.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_baseline.txt deleted file mode 100644 index df001ba497d0ed440dd34beead33d607651d3f35..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_baseline.txt +++ /dev/null @@ -1 +0,0 @@ -perf,20 1,1,1,1 2.25 tensorConvolution tensorConvApprox diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_old.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_old.txt deleted file mode 100644 index 36a7dbca05ef71b6046a91066acf5382f2a5c7a3..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_old.txt +++ /dev/null @@ -1,11 +0,0 @@ -perf,20 1,1,0 2.25 tensorConvolution tensorConvPerfCuda -perf,21 1,2,0 2.25 tensorConvolution tensorConvPerfCuda -perf,22 1,2,1 2.25 tensorConvolution tensorConvPerfCuda -perf,23 1,3,0 1.88 tensorConvolution tensorConvPerfCuda -perf,24 1,3,1 1.88 tensorConvolution tensorConvPerfCuda -perf,25 1,3,2 1.88 tensorConvolution tensorConvPerfCuda -perf,26 2,1,0 2.25 tensorConvolution tensorConvPerfCuda -perf,27 2,1,1 2.25 tensorConvolution tensorConvPerfCuda -perf,28 3,1,0 1.88 tensorConvolution tensorConvPerfCuda -perf,29 3,1,1 1.88 tensorConvolution tensorConvPerfCuda -perf,30 3,1,2 1.88 tensorConvolution tensorConvPerfCuda diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_to_fp16.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_to_fp16.txt deleted file mode 100644 index 913397cc4936bf11f3eefa15b5804700865e7b6b..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_to_fp16.txt +++ /dev/null @@ -1 +0,0 @@ -fp16,12 0 1.5 tensorConvolution tensorHalfConvolution diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_vgg16.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_vgg16.txt deleted file mode 100644 index 6fbab7d7b85255cd86748634faea0bf48ed75e42..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_fp32_vgg16.txt +++ /dev/null @@ -1,13 +0,0 @@ -perf,20 1,1,1,1 2.25 tensorConvolution tensorConvApprox -perf,21 1,2,1,0 2.25 tensorConvolution tensorConvApprox -perf,22 1,2,1,1 2.25 tensorConvolution tensorConvApprox -perf,23 1,3,1,0 1.88 tensorConvolution tensorConvApprox -perf,24 1,3,1,1 1.88 tensorConvolution tensorConvApprox -perf,25 1,3,1,2 1.88 tensorConvolution tensorConvApprox -perf,26 2,1,1,0 2.25 tensorConvolution tensorConvApprox -perf,27 2,1,1,1 2.25 tensorConvolution tensorConvApprox -perf,28 3,1,1,0 1.88 tensorConvolution tensorConvApprox -perf,29 3,1,1,1 1.88 tensorConvolution tensorConvApprox -perf,30 3,1,1,2 1.88 tensorConvolution tensorConvApprox -samp,32 1,1,2,1 1.88 tensorConvolution tensorConvApprox -samp,36 1,1,4,3 1.88 tensorConvolution tensorConvApprox diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_test.txt b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_test.txt deleted file mode 100644 index 68686b25de1c607e34d75044cd7ff19cf0c8890a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/knob_config_test.txt +++ /dev/null @@ -1 +0,0 @@ -fp16,12 0 1.5 tensorHalfConvolution tensorHalfConvolution diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/source_code_autogenerator.py b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/source_code_autogenerator.py deleted file mode 100644 index 589cdd0f4fe05cb8e9844ba9ac3dccd73133f09f..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators/source_code_autogenerator.py +++ /dev/null @@ -1,463 +0,0 @@ -# Input: file of the following table format -# id knob configurations (arbitrary # of columns) orig_func_name new_func_name -# Input: file containing list of filenames to generate modified sources for -# Generates: -# a new directory called <original_source_nane>_different_knobs -# files named <original_source_name>_<id>.txt within their respective directories - -import glob -import sys -import os -import re -import shutil - -class Approx: - FP32 = 0 - FP16 = 1 - PERF = 2 - SAMP = 3 - -class KnobConfiguration: - ''' - Stores the configurations as well as other useful information for each knob configuration - Stores: id (may factor out if ids are guaranteed to start at 0/1 and be consecutive) - original function name - modified function name - new function parameters (knobs) - new function call (modified function name(knobs)) - ''' - def __init__(self, raw_config): - ''' - Args: raw_config = line of configuration file to parse - ''' - line_as_lst = raw_config.strip().split() - # approx,<id> knob1,knob2,etc IGNORE old_fun_name new_fun_name - - approx_id_lst = line_as_lst[0].split(',') - assert len(approx_id_lst) == 2 - - self.id = int(approx_id_lst[1]) - - if approx_id_lst[0] == "fp32": - self.approx = Approx.FP32 - self.filename_ext = "fp32_converted" - return # special case - elif approx_id_lst[0] == "fp16": - self.approx = Approx.FP16 - self.filename_ext = "fp16_converted" - return # special case - elif approx_id_lst[0] == "perf": - self.approx = Approx.PERF - elif approx_id_lst[0] == "samp": - self.approx = Approx.SAMP - - self.orig_func_name = line_as_lst[-2] # Second to last element - self.modified_func_name = line_as_lst[-1] # Last element - self.params = line_as_lst[1].split(",") # First element = knob configuration - self.filename_ext = approx_id_lst[0] + "_" + "_".join(self.params) # approx_method_knobs - - - # DEBUG - def __repr__(self): - if self.approx == Approx.FP32: - return "FP32" - elif self.approx == Approx.FP16: - return "FP16" - - approx_type = None - if self.approx == Approx.PERF: - approx_type = "PERF" - elif self.approx == Approx.SAMP: - approx_type = "SAMP" - return "Approx: %s, ID: %d, Orig func nane: %s, Modified func nane: %s, Params: %s" \ - % (approx_type, self.id, self.orig_func_name, self.modified_func_name, \ - ', '.join(self.params)) - - -def get_new_path(old_path, orig_source_code_dir): - ''' - Returns a path that's compatible with the location of the generated source code - - Args: - old_path: Original path of file that's being included - orig_source_code_dir: Path to original source code dir wrt the current dir - ''' - if os.path.isabs(old_path): # Old path works - return old_path - # Adding an extra .. because the path should be wrt the generated directory - return os.path.join("..", orig_source_code_dir, old_path) - - -# "complete_line" = a valid line of code -def get_new_function_calls(complete_line, knob_config): - ''' - Returns a copy of an inputted line of code such that all instances of old - function calls are replaced with newFunctionCall(old params, knobs) - - Note: The old calls aren't completely overriden, as we still need the old parameters but - insert new parameters as well - - Args: - complete_line: A complete line of code to process - knob_config: KnobConfiguration object representing current configuration - ''' - orig_func_ind = complete_line.find(knob_config.orig_func_name) - new_line = [] - line_start_ind = 0 - last_ind = 0 - - while orig_func_ind != -1: - new_line.append(complete_line[line_start_ind : orig_func_ind]) - line_start_ind = complete_line.find(")", orig_func_ind) + 1 - - old_func_call = complete_line[complete_line.find("(", orig_func_ind): line_start_ind] - if knob_config.modified_func_name == knob_config.orig_func_name: - # count the number of new parameters - num_repl_params = len(knob_config.params) - old_func_params = old_func_call.strip().split(',') - new_line.append("%s%s, %s)" % (knob_config.modified_func_name, ', '.join(old_func_params[:-num_repl_params]), ', '.join(knob_config.params))) - - else: - new_line.append("%s%s, %s)" % (knob_config.modified_func_name, old_func_call[:-1], ', '.join(knob_config.params))) - orig_func_ind = complete_line.find(knob_config.orig_func_name, line_start_ind) - new_line.append(complete_line[line_start_ind : ]) - return ''.join(new_line) - - -def convert_local_paths(file_contents, orig_source_dir): - ''' - Converts all local paths wrt the original source file's directory to paths compatible - with the current source code directory - - Args: - file_contents: String containing source code read from file - orig_source_dir: Path of original source code dir wrt the current directory - ''' - last_include_ind = file_contents.rfind("#include") - last_include_newline_ind = file_contents.find("\n", last_include_ind) - include_lines = file_contents[ : last_include_newline_ind].split("\n") - - new_file_contents = [] - for line in include_lines: - if line.startswith("#"): - include_file = line.split()[1] - if include_file.startswith("\""): - new_include_path = get_new_path(include_file.replace("\"", ""), orig_source_dir.replace("\"", "")) - new_file_contents.append("#include \"%s\"\n" % new_include_path) - else: - new_file_contents.append(line) - new_file_contents.append(file_contents[last_include_newline_ind : ]) - return '\n'.join(new_file_contents) - - -def get_tensor_operation(line): - tensor_func_call = None - tensor_op_name = None - start_ind = None - end_ind = None - - start_ind = line.find("tensor") - if start_ind != -1: - end_ind = line.find('(') - tensor_op_name = line[start_ind + len("tensor") : end_ind] - if tensor_op_name[0].isupper(): # crude way of checking whether we have a camel cased method - end_ind = line.find('(') - tensor_func_call = line[start_ind : end_ind] - return tensor_func_call, tensor_op_name, start_ind, end_ind - - -def generate_fp32_source(new_file, source_file, orig_source_dir): - # Copy the source code over - new_file_contents = convert_local_paths(source_file.read(), orig_source_dir) - new_file.write(new_file_contents) - - -def generate_fp16_source(knob_config, new_file, source_file, orig_source_dir): - file_contents = convert_local_paths(source_file.read(), orig_source_dir).split('\n') - - new_file_contents = [] - for line in file_contents: - tensor_func_call, tensor_op_name, start_ind, end_ind = get_tensor_operation(line) - # tensorHalfSoftmax doesn't exist - if line.find("Softmax") == -1 and tensor_func_call: - new_file_contents.append(line[ : start_ind] + "tensorHalf" + tensor_op_name \ - + line[end_ind : ]) - else: - new_file_contents.append(line) - new_file.write('\n'.join(new_file_contents)) - - -def generate_approx_source(knob_config, new_file, source_file, orig_source_dir): - new_file_contents = [] - - # Store complete line to handle cases where one line of code is split into two lines - complete_line = "" - for line in source_file: - # Replace the current path of the local include with a path that's compatible - # with the location of the generated source code - if line.startswith("#"): - include_file = line.split()[1] - if include_file.startswith("\""): - new_include_path = get_new_path(include_file.replace("\"", ""), orig_source_dir.replace("\"", "")) - new_file_contents.append("#include \"%s\"\n" % new_include_path) - else: - new_file_contents.append(line) - continue - - # Handles case where 1 actual line of code is split into 2 lines - elif line.find("}") != -1 or line.find("{") != -1: - complete_line += line - new_file_contents.append(complete_line) - complete_line = "" - continue - - elif line.find(";") == -1: # Last char is always \n - complete_line += line - continue - - complete_line += line - orig_func_ind = complete_line.find(knob_config.orig_func_name) - if orig_func_ind != -1: - new_file_contents.append(get_new_function_calls(complete_line, knob_config)) - else: - new_file_contents.append(complete_line) - complete_line = "" - new_file.write(''.join(new_file_contents)) - - -def generate_source_code(table, dir_name, filename, source_name, profile_per_tensor): - ''' - Generates source code for all configurations in the table for one original source - Args - table: List of KnobConfigurations - dir_name: Directory new sources should be placed in - filename: Filename of original source - source_name: Filename without the file extension (ex: foo/blah.cc --> blah) - ''' - source_file = open(filename, "r") - orig_source_dir = os.path.dirname(filename) - - for knob_config in table: - source_file.seek(0, 0) - new_filename = os.path.join(dir_name, "%s_%s.cc" % (source_name, knob_config.id)) - #new_filename = os.path.join(dir_name, "%s_%s.cc" % (source_name, knob_config.filename_ext)) - new_file = open(new_filename, "w") - if knob_config.approx == Approx.FP16: - generate_fp16_source(knob_config, new_file, source_file, orig_source_dir) - elif knob_config.approx == Approx.FP32: - generate_fp32_source(new_file, source_file, orig_source_dir) - elif knob_config.approx == Approx.PERF or knob_config.approx == Approx.SAMP: - generate_approx_source(knob_config, new_file, source_file, orig_source_dir) - - new_file.close() # Need to flush - - if profile_per_tensor: - add_profiling_calls_per_tensor(new_filename) - print("Generated source code as %s" % new_filename) - source_file.close() - - -def generate_all_sources(table, orig_files_filename, profile_per_tensor): - ''' - Generates directories and source code for all original sources for all knob configurations - Args: - table: List of KnobConfiguration objects - orig_files_filename: Filename of file containing all original source names to generate new - sources for - ''' - orig_files = open(orig_files_filename, "r") - for orig_filename in orig_files: - orig_filename = orig_filename.strip() - - # Source name = original filename without the .cc - last_slash_ind = orig_filename.rfind("/") - file_ext_ind = orig_filename.find(".cc") - if last_slash_ind == -1: - source_name = orig_filename[ : file_ext_ind] - else: - source_name = orig_filename[last_slash_ind + 1 : file_ext_ind] - print("Source name: %s" % source_name) - - # Start with a clean directory - dir_name = "%s_autogenerated_knobs" % source_name - print("Setting up directory: %s" % dir_name) - if os.path.isdir(dir_name): - print("Directory exists: clearing everything") - for old_file in glob.glob(os.path.join(dir_name, "*")): - os.remove(old_file) - - else: - print("Generating directory: %s" % dir_name) - os.makedirs(dir_name) - - generate_source_code(table, dir_name, orig_filename, source_name, profile_per_tensor) - print("\n") - orig_files.close() - - -# This is a lazy approach but it works so ... -def add_profiling_calls_per_tensor(source_filename): - source_file = open(source_filename, "r") - orig_source = source_file.read().split('\n') - source_file.close() - - modified_source = [] - - init_profiler_cmd = "%sProfiler profiler;" - start_profiler_call = "%sprofiler.start_profiler();" - resume_profiler_call = "%sprofiler.resume_profiler();" - pause_profiler_call = "%sprofiler.pause_profiler();" - stop_profiler_call = "%sprofiler.stop_profiler();" - time_energy_profiler_call = "%sauto time_energy_%d = profiler.get_time_energy();" - reset_profiler_call = "%sprofiler.reset();" - - time_var_decl = "%sdouble %s_time = 0.0;" - time_energy_decl = "%sdouble %s_energy = 0.0;" - - time_incr_cmd = "%s%s_time += time_energy_%d.first;" - energy_incr_cmd = "%s%s_energy += time_energy_%d.second;" - - output_per_tensor = "%sstd::cout << \"Operation %s, time: \" << (%s_time) / total_runs <<\", energy: \" << (%s_energy) / total_runs << std::endl; " - total_output = "%sstd::cout << \"Total %s: \" << (%s) / total_runs << std::endl;" - - time_energy_count = 0 - - tensor_operations = set() - for line in orig_source: - line = line.strip() - tensor_func_call, _, _, _ = get_tensor_operation(line) - if tensor_func_call: tensor_operations.add(tensor_func_call) - - inserted_end_profiler_call = False - has_seen_for_loop = False - close_bracket_count = 0 - line_ind = 0 - - while line_ind < len(orig_source): - line = orig_source[line_ind] - num_leading_spaces = len(line) - len(line.lstrip()) - leading_spaces_str = ' '.join(["" for _ in range(num_leading_spaces)]) - - if line.find("for") != -1: - has_seen_for_loop = True - - if has_seen_for_loop and line.find("}") != -1: - close_bracket_count += 1 - - if line.find("#include") != -1: - modified_source.append(leading_spaces_str + line) - line_ind += 1 - continue - - if line.find("profiler") != -1 or line.find("total_time") != -1 or line.find("total_energy") != -1: - line_ind += 1 - continue - - if line.find("int total_runs") != -1: - # Now we insert the counters - for op_name in tensor_operations: - modified_source.append(time_var_decl % (leading_spaces_str, op_name)) - modified_source.append(time_energy_decl % (leading_spaces_str, op_name)) - modified_source.append(line) - modified_source.append(init_profiler_cmd % leading_spaces_str) - modified_source.append(start_profiler_call % leading_spaces_str) - line_ind += 1 - continue - - if close_bracket_count == 2 and not inserted_end_profiler_call: # NOTE this breaks if there are helper methods/scopes - modified_source.append(line) - total_time_str = [] - total_energy_str = [] - - for op_name in tensor_operations: - modified_source.append(output_per_tensor % (leading_spaces_str, op_name, op_name, op_name)) - total_time_str.append("%s_time" % op_name) - total_energy_str.append("%s_energy" % op_name) - - modified_source.append(total_output % (leading_spaces_str, "time", ' + '.join(total_time_str))) - modified_source.append(total_output % (leading_spaces_str, "energy", ' + '.join(total_energy_str))) - - modified_source.append(stop_profiler_call % leading_spaces_str) - line_ind += 1 - inserted_end_profiler_call = True - continue - - tensor_ind = line.find("tensor") - if tensor_ind == -1: - modified_source.append(line) - line_ind += 1 - continue - - word_after_tensor = line[tensor_ind + len("tensor")] - if word_after_tensor[0].isupper(): # crude way of checking whether we have a camel cased method - tensor_op = line[tensor_ind : line.find('(')] - - modified_source.append(resume_profiler_call % leading_spaces_str) - modified_source.append(line) - - # Address one line that's split up into 2 lines for readability - if line.find(")") == -1 and line_ind + 1 < len(orig_source) \ - and orig_source[line_ind + 1].find(")") != -1: - line_ind += 1 - modified_source.append(orig_source[line_ind]) - - modified_source.append(pause_profiler_call % leading_spaces_str) - modified_source.append(time_energy_profiler_call % (leading_spaces_str, time_energy_count)) - modified_source.append(time_incr_cmd % (leading_spaces_str, tensor_op, time_energy_count)) - modified_source.append(energy_incr_cmd % (leading_spaces_str, tensor_op, time_energy_count)) - modified_source.append(reset_profiler_call % leading_spaces_str) - modified_source.append("") - - time_energy_count += 1 - else: - modified_source.append(line) - line_ind += 1 - - source_file = open(source_filename, "w") - source_file.write('\n'.join(modified_source)) - source_file.close() - - -def parse_table(table_filename): - ''' - Given the filename of a table, parses the table into a list of KnobConfigurations - ''' - # Can we assume that the ids always start at 1 --> if so, can index by knobs - # else: need to use a dict - table = [] - table_file = open(table_filename, "r") - for raw_config in table_file: - table.append(KnobConfiguration(raw_config)) - table_file.close() - return table - - -def delete_autogenerated_dirs(): - for dir_name in os.listdir("."): - if dir_name.endswith("profiling_autogenerated_knobs"): - print("DELETING %s" % dir_name) - shutil.rmtree(dir_name) - print("DONE") - -if __name__ == "__main__": - num_args = len(sys.argv) - - if num_args == 2 and sys.argv[1] == "clean": - delete_autogenerated_dirs() - exit(0) - - if num_args != 3 and num_args != 4: - print("Usage: python source_code_autogenerator.py <table file> <original filenames file> [per_tensor]") - print("To delete autogen dirs: python source_code_autogenerator.py clean") - - if num_args >= 2 and sys.argv[1] == "--usage": - print("Table file format: <id> <knob configurations separated by spaces> <orig func name> <new func name>") - print("Original filenames file: <original_filename><newline> etc") - else: - print("Run with --usage flag for more detailed information") - exit(1) - - profile_per_tensor = num_args == 4 and sys.argv[3] == "per_tensor" - - table = parse_table(sys.argv[1]) - generate_all_sources(table, sys.argv[2], profile_per_tensor) diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling.cpp b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling.cpp deleted file mode 100644 index f7da755535f3b31c86e4c82801458a02930dc02a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling.cpp +++ /dev/null @@ -1,73 +0,0 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../../dnn_sources/include/utils.h" -#include <vector> - -extern void llvm_hpvm_initTensorRt(int gpuid); - -extern void llvm_hpvm_clearRuntimeController(); - -const size_t batch_size = 500; - -int main(int argc, char *argv[]) { - const char *input1_path = "../model_params/image_processing_5k"; - - llvm_hpvm_initTensorRt(0); - size_t bstart = 0; - startMemTracking(); - while (true) { - Tensor *batch = readDataSet(input1_path, bstart, batch_size); - if (batch == nullptr) // If end of dataset - break; - - void* dataset = batch; - float sigma = 1.4; - size_t w = 5; - size_t h = 5; - size_t n_chan = 1; - int64_t m = (w - 1) / 2, n = (h - 1) / 2; - auto *data = new float[w * h]; - float sum = 0.0f; - for (int64_t i = -m; i <= m; i++) - for (int64_t j = -n; j <= n; j++) { - size_t idx = (i + m) * h + (j + n); - float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); - data[idx] = exp(exponent); - sum += data[idx]; - } - if (sum != 0.0f) - for (size_t i = 0; i < w * h; i++) - data[i] /= sum; - Tensor* gaussian = (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); - Tensor *kernel_x, *kernel_y; - - std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); - std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); - auto *t1 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1); - auto *t2 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1); - std::tie(kernel_x, kernel_y) = std::make_pair(t1, t2); - - // 0. Grayscale - auto *summed_image = tensorReduce(dataset, 1, MathOp::Add, 0.0f); - auto *grayscale_image = tensorMap1(MathOp::Avg3, summed_image); - // 1. Denoise - auto *image2 = - tensorConvApprox(grayscale_image, gaussian, 2, 2, 1, 1, 1, 1, 1, 1, 1, 0); - // 2. Get edge gradient / direction - auto *grad_x = - tensorConvApprox(image2, kernel_x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0); - auto *grad_y = - tensorConvApprox(image2, kernel_y, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0); - auto *grad_mag = tensorMap2(MathOp::Hypot, grad_x, grad_y); - // 2.5. Normalize grad magnitude - auto *grad_max_1D = tensorReduce(grad_mag, 2, MathOp::Max, 0.0f); - auto *grad_max = tensorReduce(grad_max_1D, 3, MathOp::Max, 0.0f); - auto *grad_mag_norm = tensorMap2(MathOp::Div, grad_mag, grad_max); - auto* result = grad_mag_norm; - - bstart += batch_size; - freeBatchMemory(); - } - llvm_hpvm_clearRuntimeController(); -} diff --git a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling_half.cc b/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling_half.cc deleted file mode 100644 index 18462e2c9afe983d34ceb461ed306078f8a50771..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/legacy/code_autogenerators_img/canny_profiling_final/canny_profiling_half.cc +++ /dev/null @@ -1,73 +0,0 @@ -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../../dnn_sources/include/utils.h" -#include <vector> - -extern void llvm_hpvm_initTensorRt(int gpuid); - -extern void llvm_hpvm_clearRuntimeController(); - -const size_t batch_size = 500; - -int main(int argc, char *argv[]) { - const char *input1_path = "../model_params/image_processing_5k"; - - llvm_hpvm_initTensorRt(0); - size_t bstart = 0; - startMemTracking(); - while (true) { - Tensor *batch = readDataSet(input1_path, bstart, batch_size); - if (batch == nullptr) // If end of dataset - break; - - void* dataset = batch; - float sigma = 1.4; - size_t w = 5; - size_t h = 5; - size_t n_chan = 1; - int64_t m = (w - 1) / 2, n = (h - 1) / 2; - auto *data = new float[w * h]; - float sum = 0.0f; - for (int64_t i = -m; i <= m; i++) - for (int64_t j = -n; j <= n; j++) { - size_t idx = (i + m) * h + (j + n); - float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); - data[idx] = exp(exponent); - sum += data[idx]; - } - if (sum != 0.0f) - for (size_t i = 0; i < w * h; i++) - data[i] /= sum; - Tensor* gaussian = (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); - Tensor *kernel_x, *kernel_y; - - std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); - std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); - auto *t1 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1); - auto *t2 = - (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1); - std::tie(kernel_x, kernel_y) = std::make_pair(t1, t2); - - // 0. Grayscale - auto *summed_image = tensorReduceHalf(dataset, 1, MathOp::Add, 0.0f); - auto *grayscale_image = tensorMap1Half(MathOp::Avg3, summed_image); - // 1. Denoise - auto *image2 = - tensorConvApproxHalf(grayscale_image, gaussian, 2, 2, 1, 1, 1, 1, 1, 1, 1, 0); - // 2. Get edge gradient / direction - auto *grad_x = - tensorConvApproxHalf(image2, kernel_x, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0); - auto *grad_y = - tensorConvApproxHalf(image2, kernel_y, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0); - auto *grad_mag = tensorMap2Half(MathOp::Hypot, grad_x, grad_y); - // 2.5. Normalize grad magnitude - auto *grad_max_1D = tensorReduceHalf(grad_mag, 2, MathOp::Max, 0.0f); - auto *grad_max = tensorReduceHalf(grad_max_1D, 3, MathOp::Max, 0.0f); - auto *grad_mag_norm = tensorMap2Half(MathOp::Div, grad_mag, grad_max); - auto* result = grad_mag_norm; - - bstart += batch_size; - freeBatchMemory(); - } - llvm_hpvm_clearRuntimeController(); -} diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/approx_techniques_back.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/approx_techniques_back.cu deleted file mode 100644 index 25432c4e3283bfd6062adcef1fcfe7326fc8737d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/approx_techniques_back.cu +++ /dev/null @@ -1,862 +0,0 @@ - - -#include "tensor_utils.h" -#include "fp16_gemm.h" -#include "debug.h" -#include "global_data.h" -#include "profiling.h" - - -extern "C"{ - - - -__global__ -void depthwise_conv(float* const __restrict__ y, -const float* const __restrict__ x, -const float* const __restrict__ w, -const int B, const int M, -const int H, const int W, const int KH, -const int KW, const int H_out, const int W_out, -const int H_pad, const int W_pad, -const int H_stride, const int W_stride, const int start_batch) -{ - -#define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0] -#define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0] - -const int num = 1; - -const int b = num * blockIdx.x + start_batch; -const int m = blockIdx.y; //current filter/channel - -const int tx = threadIdx.x; - -const int start_h = (threadIdx.x / W_out) * H_stride - H_pad; -const int start_w = (threadIdx.x % W_out) * W_stride - W_pad; - -float C[num] = { 0 }; - -const float* weights = &w[m * KH * KW]; - -for (int k = 0; k < KH * KW; k++) { -int p = k / KW; -int q = k % KW; - -#pragma unroll -for (int i = 0; i < num; i++) { -if (start_h + p > -1 && start_h + p < H && -start_w + q > -1 && start_w + q < W) { - -C[i] += x4d(b + i, m, start_h + p, start_w + q) * weights[k]; -} - -} -} - -#pragma unroll -for (int i = 0; i < num; i++) { -if(b + i < B) -y4d(b + i, m, 0, tx) = C[i]; - -} - - -#undef y4d -#undef x4d -} - - -__global__ -void depthwise_convNew(float* const __restrict__ y, -const float* const __restrict__ x, -const float* const __restrict__ w, -const int B, const int M, -const int H, const int W, const int KH, -const int KW, const int H_out, const int W_out, -const int H_pad, const int W_pad, -const int H_stride, const int W_stride) -{ - -#define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0] -#define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0] - -const int num = 12; - -const int b = num * blockIdx.x; -const int m = (blockIdx.y * blockDim.x + threadIdx.x)/ (H_out * W_out); - -const int tx = (blockIdx.y * blockDim.x + threadIdx.x) % (H_out * W_out); - -const int start_h = (tx / W_out) * H_stride - H_pad; -const int start_w = (tx % W_out) * W_stride - W_pad; - -float C[num] = { 0 }; - -const float* weights = &w[m * KH * KW]; - -for (int k = 0; k < KH * KW; k++) { -int p = k / KW; -int q = k % KW; - -if (start_h + p > -1 && start_h + p < H && -start_w + q > -1 && start_w + q < W) { - -#pragma unroll -for (int i = 0; i < num; i++) { -if(b + i < B) -C[i] += x4d(b + i, m, start_h + p, start_w + q) * weights[k]; -} - -} -} - -#pragma unroll -for (int i = 0; i < num; i++) { -if(b + i < B) -y4d(b + i, m, 0, tx) = C[i]; - -} - -#undef y4d -#undef x4d -} - - - - -__global__ void depthwise_convNew8_half(__half* const __restrict__ y, - const __half* const __restrict__ x, - const __half* const __restrict__ w, - const int B, const int M, - const int H, const int W, const int KH, - const int KW, const int H_out, const int W_out, - const int H_pad, const int W_pad, - const int H_stride, const int W_stride) -{ - - #define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0] - #define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0] - - const int num = 8; - - const int b = num * blockIdx.x; - const int m = (blockIdx.y * blockDim.x + threadIdx.x)/ (H_out * W_out); - - if(m < M){ - const int tx = (blockIdx.y * blockDim.x + threadIdx.x) % (H_out * W_out); - - const int start_h = (tx / W_out) * H_stride - H_pad; - const int start_w = (tx % W_out) * W_stride - W_pad; - - __half c0 = 0; - __half c1 = 0; - __half c2 = 0; - __half c3 = 0; - __half c4 = 0; - __half c5 = 0; - __half c6 = 0; - __half c7 = 0; - - const __half* weights = &w[m * KH * KW]; - - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - c0 = __hfma(x4d(b, m, start_h + p, start_w + q), weights[k], c0); - if(b + 1 < B) - c1 = __hfma(x4d(b + 1, m, start_h + p, start_w + q), weights[k], c1); - if(b + 2 < B) - c2 = __hfma(x4d(b + 2, m, start_h + p, start_w + q), weights[k], c2); - if(b + 3 < B) - c3 = __hfma(x4d(b + 3, m, start_h + p, start_w + q), weights[k], c3); - if(b + 4 < B) - c4 = __hfma(x4d(b + 4, m, start_h + p, start_w + q), weights[k], c4); - if(b + 5 < B) - c5 = __hfma(x4d(b + 5, m, start_h + p, start_w + q), weights[k], c5); - if(b + 6 < B) - c6 = __hfma(x4d(b + 6, m, start_h + p, start_w + q), weights[k], c6); - if(b + 7 < B) - c7 = __hfma(x4d(b + 7, m, start_h + p, start_w + q), weights[k], c7); - - - } - } - - y4d(b, m, 0, tx) = c0; - if(b + 1 < B) - y4d(b + 1, m, 0, tx) = c1; - if(b + 2 < B) - y4d(b + 2, m, 0, tx) = c2; - if(b + 3 < B) - y4d(b + 3, m, 0, tx) = c3; - if(b + 4 < B) - y4d(b + 4, m, 0, tx) = c4; - if(b + 5 < B) - y4d(b + 5, m, 0, tx) = c5; - if(b + 6 < B) - y4d(b + 6, m, 0, tx) = c6; - if(b + 7 < B) - y4d(b + 7, m, 0, tx) = c7; - } - - #undef y4d - #undef x4d -} - -__global__ void depthwise_convNew8_half1(__half* const __restrict__ y, - const __half* const __restrict__ x, - const __half* const __restrict__ w, - const int B, const int M, - const int H, const int W, const int KH, - const int KW, const int H_out, const int W_out, - const int H_pad, const int W_pad, - const int H_stride, const int W_stride) -{ - - #define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0] - #define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0] - - const int num = 8; - - const int b = num * blockIdx.x; - const int m = (blockIdx.y * blockDim.x + threadIdx.x)/ (H_out * W_out); - - if(m < M){ - const int tx = (blockIdx.y * blockDim.x + threadIdx.x) % (H_out * W_out); - - const int start_h = (tx / W_out) * H_stride - H_pad; - const int start_w = (tx % W_out) * W_stride - W_pad; - - __half c0 = 0; - __half c1 = 0; - __half c2 = 0; - __half c3 = 0; - __half c4 = 0; - __half c5 = 0; - __half c6 = 0; - __half c7 = 0; - - const __half* weights = &w[m * KH * KW]; - - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - c0 = __hfma(x4d(b, m, start_h + p, start_w + q), weights[k], c0); - } - } - - if(b + 1 < B){ - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - c1 = __hfma(x4d(b + 1, m, start_h + p, start_w + q), weights[k], c1); - } - } - } - - if(b + 2 < B){ - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - c2 = __hfma(x4d(b + 2, m, start_h + p, start_w + q), weights[k], c2); - } - } - } - - if(b + 3 < B){ - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - c3 = __hfma(x4d(b + 3, m, start_h + p, start_w + q), weights[k], c3); - } - } - } - - if(b + 4 < B){ - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - c4 = __hfma(x4d(b + 4, m, start_h + p, start_w + q), weights[k], c4); - } - } - } - - if(b + 5 < B){ - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - c5 = __hfma(x4d(b + 5, m, start_h + p, start_w + q), weights[k], c5); - } - } - } - - if(b + 6 < B){ - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - c6 = __hfma(x4d(b + 6, m, start_h + p, start_w + q), weights[k], c6); - } - } - } - - if(b + 7 < B){ - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - c7 = __hfma(x4d(b + 7, m, start_h + p, start_w + q), weights[k], c7); - } - } - } - - - - y4d(b, m, 0, tx) = c0; - if(b + 1 < B) - y4d(b + 1, m, 0, tx) = c1; - if(b + 2 < B) - y4d(b + 2, m, 0, tx) = c2; - if(b + 3 < B) - y4d(b + 3, m, 0, tx) = c3; - if(b + 4 < B) - y4d(b + 4, m, 0, tx) = c4; - if(b + 5 < B) - y4d(b + 5, m, 0, tx) = c5; - if(b + 6 < B) - y4d(b + 6, m, 0, tx) = c6; - if(b + 7 < B) - y4d(b + 7, m, 0, tx) = c7; - } - - #undef y4d - #undef x4d -} - - - - - - - - -__global__ void depthwise_convNew12(float* const __restrict__ y, - const float* const __restrict__ x, - const float* const __restrict__ w, - const int B, const int M, - const int H, const int W, const int KH, - const int KW, const int H_out, const int W_out, - const int H_pad, const int W_pad, - const int H_stride, const int W_stride) -{ - - #define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0] - #define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0] - - const int num = 12; - - const int b = num * blockIdx.x; - const int m = (blockIdx.y * blockDim.x + threadIdx.x)/ (H_out * W_out); - - if(m < M){ - const int tx = (blockIdx.y * blockDim.x + threadIdx.x) % (H_out * W_out); - - const int start_h = (tx / W_out) * H_stride - H_pad; - const int start_w = (tx % W_out) * W_stride - W_pad; - - float c0 = 0; - float c1 = 0; - float c2 = 0; - float c3 = 0; - float c4 = 0; - float c5 = 0; - float c6 = 0; - float c7 = 0; - float c8 = 0; - float c9 = 0; - float c10 = 0; - float c11 = 0; - - const float* weights = &w[m * KH * KW]; - - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - c0 += x4d(b, m, start_h + p, start_w + q) * weights[k]; - if(b + 1 < B) - c1 += x4d(b + 1, m, start_h + p, start_w + q) * weights[k]; - if(b + 2 < B) - c2 += x4d(b + 2, m, start_h + p, start_w + q) * weights[k]; - if(b + 3 < B) - c3 += x4d(b + 3, m, start_h + p, start_w + q) * weights[k]; - if(b + 4 < B) - c4 += x4d(b + 4, m, start_h + p, start_w + q) * weights[k]; - if(b + 5 < B) - c5 += x4d(b + 5, m, start_h + p, start_w + q) * weights[k]; - if(b + 6 < B) - c6 += x4d(b + 6, m, start_h + p, start_w + q) * weights[k]; - if(b + 7 < B) - c7 += x4d(b + 7, m, start_h + p, start_w + q) * weights[k]; - if(b + 8 < B) - c8 += x4d(b + 8, m, start_h + p, start_w + q) * weights[k]; - if(b + 9 < B) - c9 += x4d(b + 9, m, start_h + p, start_w + q) * weights[k]; - if(b + 10 < B) - c10 += x4d(b + 10, m, start_h + p, start_w + q) * weights[k]; - if(b + 11 < B) - c11 += x4d(b + 11, m, start_h + p, start_w + q) * weights[k]; - - - } - } - - y4d(b, m, 0, tx) = c0; - if(b + 1 < B) - y4d(b + 1, m, 0, tx) = c1; - if(b + 2 < B) - y4d(b + 2, m, 0, tx) = c2; - if(b + 3 < B) - y4d(b + 3, m, 0, tx) = c3; - if(b + 4 < B) - y4d(b + 4, m, 0, tx) = c4; - if(b + 5 < B) - y4d(b + 5, m, 0, tx) = c5; - if(b + 6 < B) - y4d(b + 6, m, 0, tx) = c6; - if(b + 7 < B) - y4d(b + 7, m, 0, tx) = c7; - if(b + 8 < B) - y4d(b + 8, m, 0, tx) = c8; - if(b + 9 < B) - y4d(b + 9, m, 0, tx) = c9; - if(b + 10 < B) - y4d(b + 10, m, 0, tx) = c10; - if(b + 11 < B) - y4d(b + 11, m, 0, tx) = c11; - - } - - #undef y4d - #undef x4d -} - - -__global__ void depthwise_convNew12_half(__half* const __restrict__ y, - const __half* const __restrict__ x, - const __half* const __restrict__ w, - const int B, const int M, - const int H, const int W, const int KH, - const int KW, const int H_out, const int W_out, - const int H_pad, const int W_pad, - const int H_stride, const int W_stride) -{ - - #define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0] - #define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0] - - const int num = 12; - - const int b = num * blockIdx.x; - const int m = (blockIdx.y * blockDim.x + threadIdx.x)/ (H_out * W_out); - - if(m < M){ - const int tx = (blockIdx.y * blockDim.x + threadIdx.x) % (H_out * W_out); - - const int start_h = (tx / W_out) * H_stride - H_pad; - const int start_w = (tx % W_out) * W_stride - W_pad; - - __half c0 = 0; - __half c1 = 0; - __half c2 = 0; - __half c3 = 0; - __half c4 = 0; - __half c5 = 0; - __half c6 = 0; - __half c7 = 0; - __half c8 = 0; - __half c9 = 0; - __half c10 = 0; - __half c11 = 0; - - const __half* weights = &w[m * KH * KW]; - - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - c0 = __hfma(x4d(b, m, start_h + p, start_w + q), weights[k], c0); - if(b + 1 < B) - c1 = __hfma(x4d(b + 1, m, start_h + p, start_w + q), weights[k], c1); - if(b + 2 < B) - c2 = __hfma(x4d(b + 2, m, start_h + p, start_w + q), weights[k], c2); - if(b + 3 < B) - c3 = __hfma(x4d(b + 3, m, start_h + p, start_w + q), weights[k], c3); - if(b + 4 < B) - c4 = __hfma(x4d(b + 4, m, start_h + p, start_w + q), weights[k], c4); - if(b + 5 < B) - c5 = __hfma(x4d(b + 5, m, start_h + p, start_w + q), weights[k], c5); - if(b + 6 < B) - c6 = __hfma(x4d(b + 6, m, start_h + p, start_w + q), weights[k], c6); - if(b + 7 < B) - c7 = __hfma(x4d(b + 7, m, start_h + p, start_w + q), weights[k], c7); - if(b + 8 < B) - c8 = __hfma(x4d(b + 8, m, start_h + p, start_w + q), weights[k], c8); - if(b + 9 < B) - c9 = __hfma(x4d(b + 9, m, start_h + p, start_w + q), weights[k], c9); - if(b + 10 < B) - c10 = __hfma(x4d(b + 10, m, start_h + p, start_w + q), weights[k], c10); - if(b + 11 < B) - c11 = __hfma(x4d(b + 11, m, start_h + p, start_w + q), weights[k], c11); - - - } - } - - y4d(b, m, 0, tx) = c0; - if(b + 1 < B) - y4d(b + 1, m, 0, tx) = c1; - if(b + 2 < B) - y4d(b + 2, m, 0, tx) = c2; - if(b + 3 < B) - y4d(b + 3, m, 0, tx) = c3; - if(b + 4 < B) - y4d(b + 4, m, 0, tx) = c4; - if(b + 5 < B) - y4d(b + 5, m, 0, tx) = c5; - if(b + 6 < B) - y4d(b + 6, m, 0, tx) = c6; - if(b + 7 < B) - y4d(b + 7, m, 0, tx) = c7; - if(b + 8 < B) - y4d(b + 8, m, 0, tx) = c8; - if(b + 9 < B) - y4d(b + 9, m, 0, tx) = c9; - if(b + 10 < B) - y4d(b + 10, m, 0, tx) = c10; - if(b + 11 < B) - y4d(b + 11, m, 0, tx) = c11; - - } - - #undef y4d - #undef x4d -} - - - - - -__global__ void depthwise_convNew4_half2(__half* const __restrict__ y, - const __half* const __restrict__ x, - const __half* const __restrict__ w, - const int B, const int M, - const int H, const int W, const int KH, - const int KW, const int H_out, const int W_out, - const int H_pad, const int W_pad, - const int H_stride, const int W_stride) -{ - - #define y4d(i3, i2, i1, i0) y[(i3) * (M * H_out * W_out) + (i2) * (H_out * W_out) + (i1) * (W_out) + i0] - #define x4d(i3, i2, i1, i0) x[(i3) * (M * H * W) + (i2) * (H * W) + (i1) * (W) + i0] - - const int num = 4; - - const int b = num * blockIdx.x; - const int m = (blockIdx.y * blockDim.x + threadIdx.x)/ (H_out * W_out); - - if(m < M){ - const int tx = (blockIdx.y * blockDim.x + threadIdx.x) % (H_out * W_out); - - const int start_h = (tx / W_out) * H_stride - H_pad; - const int start_w = (tx % W_out) * W_stride - W_pad; - - __half2 c0 = __half2half2(0); - __half2 c1 = __half2half2(0); - - const __half* weights = &w[m * KH * KW]; - - for (int k = 0; k < KH * KW; k++) { - int p = k / KW; - int q = k % KW; - if (start_h + p > -1 && start_h + p < H && - start_w + q > -1 && start_w + q < W) { - - - __half2 t1; - __half2 t2; - if(b + 3 < B){ - t1 = __halves2half2(x4d(b + 1, m, start_h + p, start_w + q), x4d(b, m, start_h + p, start_w + q)); - t2 = __halves2half2(x4d(b + 3, m, start_h + p, start_w + q), x4d(b + 2, m, start_h + p, start_w + q)); - } - else if(b + 2 < B){ - t1 = __halves2half2(x4d(b + 1, m, start_h + p, start_w + q), x4d(b, m, start_h + p, start_w + q)); - t2 = __halves2half2(0, x4d(b + 2, m, start_h + p, start_w + q)); - - } - else if(b + 1 < B){ - t1 = __halves2half2(x4d(b + 1, m, start_h + p, start_w + q), x4d(b, m, start_h + p, start_w + q)); - } - else{ - t1 = __halves2half2(0, x4d(b, m, start_h + p, start_w + q)); - - } - - - c0 = __hfma2(t1, __halves2half2(weights[k], weights[k]), c0); - c1 = __hfma2(t2, __halves2half2(weights[k], weights[k]), c1); - - } - } - - y4d(b, m, 0, tx) = __high2half(c0); - if(b + 1 < B) - y4d(b + 1, m, 0, tx) = __low2half(c0); - if(b + 2 < B) - y4d(b + 2, m, 0, tx) = __high2half(c1); - if(b + 3 < B) - y4d(b + 3, m, 0, tx) = __low2half(c1); - - } - - #undef y4d - #undef x4d -} - - - - - - -// Perforated Tensor Conv with 'perforation_rate' parameter -void* tensorConvPerf(void* input_ptr, void* filter_ptr, - int vertical_pad, int horizontal_pad, - int vertical_stride, int horizontal_stride, - int conv_mode, int conv_groups, int row, int col){ - - INFO("*** TensorConvolution \n"); - profileEvent("tensorConv"); - - Tensor* input = (Tensor*) input_ptr; - Tensor* filter = (Tensor*) filter_ptr; - - cudnnConvolutionDescriptor_t convDesc; - cudnnConvolutionFwdAlgo_t convAlgo; - cudnnConvolutionMode_t mode; - if(conv_mode == 0) - mode = CUDNN_CONVOLUTION; - else if(conv_mode == 1) - mode = CUDNN_CROSS_CORRELATION; - - // FIXIT: Need to be more aware of the implications of alpha and beta - float alpha = 1.0f, beta = 0.0f; - - // TODO: Support other cases; - hostToDeviceCopy(input); - hostToDeviceCopy(filter); - - INFO("vertical_stride = %lu, horizontal_stride = %lu \n", vertical_stride, horizontal_stride); - - checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc)); - - //FIXME: Current hack to preserve backward compatibilty - if(conv_groups == 0){ - conv_groups = 1; - } - - // NOTE: Adding support for grouped convolution - checkCUDNN(cudnnSetConvolutionGroupCount(convDesc, conv_groups)); - - int new_v = vertical_stride + row; - int new_h = horizontal_stride + col; - cudnnDataType_t computeType = CUDNN_DATA_FLOAT; - // FIXIT: Think if upscaling values need to be configurable? - // IMP-FIXIT: Either make mode configurable OR see if CUDNN_CONVOLUTION MODE should be used? - checkCUDNN(cudnnSetConvolution2dDescriptor(convDesc, - vertical_pad, horizontal_pad, // conv padding - new_v, new_h, // conv strides - 1, 1, // upscaling values - mode , // mode is configurable - computeType)); // defines compute precision - - int n, c, h, w; // output dimensions - // Find dimension of convolution output - checkCUDNN(cudnnGetConvolution2dForwardOutputDim(convDesc, - input->tensor_desc, - filter->filter_desc, - &n, &c, &h, &w)); - - - DEBUG("**Output Tensor Dims, n = %d, c = %d, h = %d, w = %d \n", n, c, h, w); - - Tensor* output; - if(input->data_format == CUDNN_TENSOR_NCHW) - output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type, - CUDNN_TENSOR_NCHW, n, c, h, w); - else if(input->data_format == CUDNN_TENSOR_NHWC){ - DEBUG("* NHWC Format \n"); - output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type, - CUDNN_TENSOR_NHWC, n, h, w, c); - } - else - ERROR("Unsupported Tensor Type"); - - // NOTE: Changing output tensor placement from host to device - changeTensorPlacement(output, DEVICE); - // NOTE: Necessary to insert the above call for every output tensor - - DEBUG("tensor->data_type = %d, tensor->data_format = %d, N = %d, C = %d, H = %d, W = %d \n", - output->data_type, output->data_format, output->dims.dim_sizes[0], output->dims.dim_sizes[1], - output->dims.dim_sizes[2], output->dims.dim_sizes[3]); - - if(convDesc == NULL || input->tensor_desc == NULL || - filter->filter_desc == NULL || output->tensor_desc == NULL) - ERROR("NULL descriptor! \n"); - - - // Debugging info prints - printTensorDescInfo(input); - printTensorDescInfo(filter); - printTensorDescInfo(output); - - // NOTE-FIXIT: function failing for NHWC formats - perhaps some CUDNN support is lacking - checkCUDNN(cudnnGetConvolutionForwardAlgorithm(cudnnHandle, - input->tensor_desc, - filter->filter_desc, - convDesc, - output->tensor_desc, - CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, - //CUDNN_CONVOLUTION_FWD_NO_WORKSPACE, - 0, - &convAlgo)); - - - DEBUG("ConvAlgo = %d, FFT = %d, GEMM = %d, WINOGRAD = %d \n", convAlgo, - CUDNN_CONVOLUTION_FWD_ALGO_FFT, CUDNN_CONVOLUTION_FWD_ALGO_GEMM, - CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD); - - - // FIXIT: Algo shouldn't be hardcoded - convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; - - size_t workspace_size; - checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle, - input->tensor_desc, - filter->filter_desc, - convDesc, - output->tensor_desc, - convAlgo, - &workspace_size)); - - // Allocating memory for the convolution workspace - void* workspace; - checkCudaErrors(cudaMalloc(&workspace, workspace_size)); - DEBUG("workspace size = %d \n", workspace_size); - - - checkCUDNN(cudnnConvolutionForward(cudnnHandle, &alpha, input->tensor_desc, - input->gpu_data, filter->filter_desc, filter->gpu_data, - convDesc, convAlgo, workspace, workspace_size, - &beta, output->tensor_desc, output->gpu_data)); - - - int old_w = w; - int old_h = h; - h = (2 * vertical_pad + input->dims.dim_sizes[2] - filter->dims.dim_sizes[2]) / vertical_stride + 1; - w = (2 * horizontal_pad + input->dims.dim_sizes[3] - filter->dims.dim_sizes[3]) / horizontal_stride + 1; - - Tensor* new_output; - if(input->data_format == CUDNN_TENSOR_NCHW) - new_output = (Tensor*) create4DTensor((cudnnDataType_t) float_type, //input->data_type, - CUDNN_TENSOR_NCHW, n, c, h, w); - else if(input->data_format == CUDNN_TENSOR_NHWC){ - DEBUG("* NHWC Format \n"); - new_output = (Tensor*) create4DTensor((cudnnDataType_t) input->data_type, - CUDNN_TENSOR_NHWC, n, h, w, c); - } - else - ERROR("Unsupported Tensor Type"); - - - int numBlocks = (n * c * h * w + 127) / 128; - if(vertical_stride == 0 && row == 0) - return output; - - if(vertical_stride == 1 && row == 1){ - interpolateRow<<<numBlocks,128>>>(n * c * h * w, old_h, n, c, h, w, - (float *)output->gpu_data, (float *)new_output->gpu_data); - } - else if(horizontal_stride == 1 && col == 1){ - interpolateCol<<<numBlocks,128>>>(n * c * h * w, old_w, n, c, h, w, - (float *)output->gpu_data, (float *)new_output->gpu_data); - } - else if (col > 0){ - interpolateXCol<<<numBlocks,128>>>(n * c * h * w, old_w, n, c, h, w, - (float *)output->gpu_data, (float *)new_output->gpu_data, col + 1); - } - else{ - interpolateXRow<<<numBlocks,128>>>(n * c * h * w, old_h, n, c, h, w, - (float *)output->gpu_data, (float *)new_output->gpu_data, row + 1); - } - - - cudaDeviceSynchronize(); - - profileEvent("tensorConv_end", true); - - - changeTensorPlacement(new_output, DEVICE); - return new_output; - -} - - - - - - - - -} - diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/tensor_runtime.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/tensor_runtime.cu deleted file mode 100644 index 5c6f0369384fd580f32ab2771a988e840a33076a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/tensor_runtime.cu +++ /dev/null @@ -1,2121 +0,0 @@ -/* This file includes the API implementation of the HPVM tensor runtime built on cublas, cudnn -** -** Author: Hashim Sharif -** Email: hsharif3@illinois.edu -*/ - -#include <stdio.h> -#include <stdarg.h> -#include <cstdio> -#include <cstdlib> -#include <cmath> -#include <ctime> -#include <cfloat> -#include <algorithm> -#include <chrono> -#include <iomanip> -#include <iostream> -#include <map> -#include <memory> -#include <random> -#include <sstream> -#include <string> -#include <vector> - -#include <cuda_runtime.h> -#include <device_launch_parameters.h> - -#include <cublas_v2.h> -#include <cudnn.h> -#include <cublas_api.h> -#include <cuda_fp16.h> -#include <driver_types.h> - - -// Tensor runtime header files -#include "tensor_runtime.h" -#include "tensor_utils.h" -#include "debug.h" -#include "profiling.h" -#include "fp16_conversion.h" -#include "global_data.h" -#include "error.h" -#include "tensor.h" -#include "op_overheads.h" -#include "half_precision_api.h" -#include "hpvm-rt-controller.h" -#include "approxhpvm_runtime_utils.h" -#include "approx_api.h" - - -// Image tensor runtime implementation -#include "img_tensor_runtime.cu" - -//** Potential Improvements: -// 1) Add support for dataypes beyond floats and half -// 2) Support for more CUDNN operations - - - -void llvm_hpvm_initTensorRt(int gpuid){ - - if(!runtime_initialized){ - - printf("INITIALIZING GPU %d \n", gpuid); - // NOTE: Setting the target GPU. Can we use multiple GPUs? - checkCudaErrors(cudaSetDevice(gpuid)); - // Initializing cuDNN and cuBlas handles - checkCudaErrors(cublasCreate(&cublasHandle)); - checkCUDNN(cudnnCreate(&cudnnHandle)); - - -#ifdef PROMISE_TUNER_ENABLED - // readOpenTunerFlags("opentuner_flags"); - readOpenTunerFlags("promise_flags"); -#endif - - -#ifdef ERROR_INJECTION_ENABLED - readOpenTunerFlags("opentuner_flags"); -#endif - - - runtime_initialized = true; - } - -} - - -void llvm_hpvm_cleanupTensorRt(){ - DEBUG("\**** llvm_hpvm_cleanupTensorRt ***\n"); - dumpAccuracyNorms(); -} - - -void llvm_hpvm_initApproxhpvmRt(int gpuid){ - llvm_hpvm_initTensorRt(gpuid); - approxhpvm_runtime_mode = true; -} - -void llvm_hpvm_cleanupApproxhpvmRt(){ - -} - - - -void dumpAccuracyNorms(){ - - #ifdef ERROR_INJECTION_ENABLED - - - #endif - - dump_result("accuracy_summary"); - -} - - -// Returns the number of GPUs active on the platform -int getGPUCount(){ - int num_gpus; - checkCudaErrors(cudaGetDeviceCount(&num_gpus)); - return num_gpus; -} - - - -void clearTensorMap(){ - - tensors_ptr.clear(); - host_ptr.clear(); - obj_ptr.clear(); -} - - -void startMemTracking(){ - - tensors_ptr.clear(); - host_ptr.clear(); - obj_ptr.clear(); - - tracked_tensors.clear(); -} - - -void freeOutputTensors(){ - - DEBUG("**** Freeing Ouput Tensors *** \n"); - for(int i = 0; i < tensors_ptr.size(); i++){ - cudaFree(tensors_ptr[i]); - tensors_ptr[i] = NULL; - } - - for(int i = 0; i < host_ptr.size(); i++){ - free(host_ptr[i]); - host_ptr[i] = NULL; - } - - for(int i = 0; i < obj_ptr.size(); i++){ - free(obj_ptr[i]); - obj_ptr[i] = NULL; - } -} - - - -void clearOpCounter(){ - total_ops = 0; - op_counter = 0; - op_accuracies.clear(); -} - - - -void freeBatchMemory(){ - // Free allocated memory for the current mini-batch - freeOutputTensors(); - // Reinitialize couter for OpenTuner flags - next mini-batch of execution - op_counter = 0; - // Clearing profiling data map - func_counters.clear(); -} - - - - -// FIXIT: Fix any assumptions on the NCHW format -// TODO: benchmark split performance and check if it is prohibitively high? -void** tensorSplit(void* tensor_ptr, int num_splits, int split_dim){ - - INFO("*** TensorSplit \n"); - profileEvent("tensorSplit"); - - Tensor* tensor = (Tensor*) tensor_ptr; - - deviceToHostCopy(tensor); // Splitting done on the host - - Tensor** splits = (Tensor**) malloc(sizeof(Tensor*) * num_splits); - size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * tensor->dims.num_dims); - for(unsigned int i = 0; i < tensor->dims.num_dims; i++){ - dim_sizes[i] = tensor->dims.dim_sizes[i]; - } - - - dim_sizes[split_dim] = tensor->dims.dim_sizes[split_dim] / num_splits; - if(dim_sizes[split_dim] < 1) - ERROR("Split Dimension < 1 after splitting"); - - size_t copy_size = getTypeSize(tensor->data_type); - for(unsigned int i = split_dim; i < tensor->dims.num_dims; i++){ - copy_size = copy_size * dim_sizes[i]; - } - - for(unsigned int i = 0; i < num_splits; i++){ - // FIXIT: Don't be specific to 4D tensors - // NOTE: Using same data format (NHWC/NCHW) for the split tensors - INFO("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n", - dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]); - - Tensor* split = (Tensor*) create4DTensor(tensor->data_type, tensor->data_format, - dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]); - - size_t copy_start = i * copy_size; - size_t copy_stride = num_splits * copy_size; - INFO("copy_size = %d, copy_start = %d, copy_stride = %d, tensor->size_in_bytes = %d \n", - copy_size, copy_start, copy_stride, tensor->size_in_bytes); - - int index = 0; - while(copy_start + copy_size <= tensor->size_in_bytes){ - memcpy(((char*) split->host_data + (index * copy_size)), - ((char*)tensor->host_data + copy_start), - copy_size); - copy_start += copy_stride; - index++; - } - - splits[i] = split; - } - - profileEvent("tensorSplit_end", true); - - return (void**) splits; -} - - -void* tensorConcat(void** tensors_ptr, int num_splits, int split_dim){ - - INFO("*** TensorConcat \n"); - profileEvent("tensorConcat"); - - Tensor** tensors = (Tensor**) tensors_ptr; - - for(int i = 0; i < num_splits; i++){ - deviceToHostCopy(tensors[i]); // Concatenation done on the host - } - - // The no of dimensions of concatenated tensor are the same - size_t* dim_sizes = (size_t*) malloc(sizeof(size_t) * tensors[0]->dims.num_dims); - for(unsigned int i = 0; i < tensors[0]->dims.num_dims; i++){ - dim_sizes[i] = tensors[0]->dims.dim_sizes[i]; - } - - size_t copy_size = getTypeSize(tensors[0]->data_type); - for(unsigned int i = split_dim; i < tensors[0]->dims.num_dims; i++){ - copy_size = copy_size * dim_sizes[i]; - } - - dim_sizes[split_dim] = dim_sizes[split_dim] * num_splits; - if(dim_sizes[split_dim] < 1) - ERROR("Split Dimension < 1 after concat"); - - Tensor* output = (Tensor*) create4DTensor(tensors[0]->data_type, tensors[0]->data_format, - dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]); - - INFO("dim_sizes[0] = %d, dim_sizes[1] = %d, dim_sizes[2] = %d, dim_sizes[3] = %d \n", - dim_sizes[0], dim_sizes[1], dim_sizes[2], dim_sizes[3]); - - - int num_copies = 1; - for(unsigned int i = 0; i < split_dim; i++){ - num_copies = num_copies * dim_sizes[i]; - } - - size_t copy_stride = num_splits * copy_size; - INFO("copy_size = %d, num_copies = %d, copy_stride = %d, output->size_in_bytes = %d \n", - copy_size, num_copies, copy_stride, output->size_in_bytes); - - for(unsigned int i = 0; i < num_copies; i++){ - // FIXIT: Don't be specific to 4D tensors - size_t copy_start = i * copy_stride; - - for(int j = 0; j < num_splits; j++){ - struct Tensor* split = tensors[j]; - memcpy(((char*) output->host_data + copy_start + (j * copy_size)), - ((char*) split->host_data + (i * copy_size)), - copy_size); - } - } - - profileEvent("tensorConcat_end", true); - - return output; -} - - - -void* tensorLRN(void* input_ptr, unsigned int LRN_window, - double LRN_alpha, double LRN_beta, double LRN_k){ - - INFO("*** TensorLRN \n"); - profileEvent("tensorLRN"); - - Tensor* input = (Tensor*) input_ptr; - - hostToDeviceCopy(input); - - float alpha = 1.0f, beta = 0.0f; - cudnnLRNDescriptor_t LRNDesc; - checkCUDNN(cudnnCreateLRNDescriptor(&LRNDesc)); - - INFO("window = %d, LRN_alpha = %f, LRN_beta = %f, LRN_k = %f \n", - LRN_window, LRN_alpha, LRN_beta, LRN_k); - - - checkCUDNN(cudnnSetLRNDescriptor(LRNDesc, LRN_window, LRN_alpha, LRN_beta, LRN_k)); - - size_t* dim_sizes = input->dims.dim_sizes; - Tensor* output = (Tensor*) create4DTensor((cudnnDataType_t) float_type, - CUDNN_TENSOR_NCHW, dim_sizes[0], dim_sizes[1], - dim_sizes[2], dim_sizes[3]); - // NOTE: Changing output tensor placement from host to device - changeTensorPlacement(output, DEVICE); - // NOTE: Necessary to insert the above call for every output tensor - - printTensorDescInfo(input); - printTensorDescInfo(output); - - checkCUDNN(cudnnLRNCrossChannelForward(cudnnHandle, LRNDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1, - &alpha, input->tensor_desc, input->gpu_data, - &beta, output->tensor_desc, output->gpu_data)); - - profileEvent("tensorLRN_end", true); - - return output; -} - - -void printTensorDims2(void* tensor_ptr){ - - struct Tensor* tensor = (struct Tensor*) tensor_ptr; - - printf("Num_elems = %lu \n", tensor->num_elems); - for (int i = 0; i < tensor->dims.num_dims; i++){ - printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]); - } -} - - - - -// FIXIT: tensorAdd currently only works for 4D tensors -void* tensorAdd(void* x_ptr, void* bias_ptr){ - - Tensor* x = (Tensor*) x_ptr; - Tensor* bias = (Tensor*) bias_ptr; - - INFO("*** TensorAdd \n"); - profileEvent("Add"); - - float alpha = 1.0f; - //float beta = 0.0f; - hostToDeviceCopy(x); - hostToDeviceCopy(bias); - - convertToFP32(x); - convertToFP32(bias); - - - INFO("x->num_elems = %d \n", x->num_elems); - INFO("bias->num_elems = %d \n", bias->num_elems); - - if(cudnnHandle == NULL){ - ERROR("cudnnHandle NOT initialized!! \n"); - } - - // FIXIT: routine fails for 3D tensors - checkCUDNN(cudnnAddTensor(cudnnHandle, &alpha, bias->tensor_desc, - bias->gpu_data, &alpha, x->tensor_desc, x->gpu_data)); - - profileEvent("Add_end", true); - - #ifdef ERROR_INJECTION_ENABLED - if(op_counter >= total_ops){ - ERROR("No accuracy flag found \n"); - } - - int op_acc = op_accuracies[op_counter]; - - // Forcing 0 error for (Resnet-like) equal dimension adds (Testing-only) - - //-- if (bias->dims.dim_sizes[0] > 1) - //-- op_acc = 0; - // Skip errorInjection if explicitly requested - //-- if (skip_tensors.find(op_counter) != skip_tensors.end()){ - //-- op_acc = 0; - // } - - void* error_norms = tensorAddError(x, op_acc); - add_norms(error_norms, "tensorAdd", op_acc); - add_bias_overheads(x, op_acc); - op_counter++; - - #endif - - - return x; -} - - -// FIXIT: Generalize all of the routines for types {half, float, double} -void* tensorConvolution(void* input_ptr, void* filter_ptr, - int vertical_pad, int horizontal_pad, - int vertical_stride, int horizontal_stride, - int conv_mode, int conv_groups){ - - INFO("*** TensorConvolution \n"); - profileEvent("Conv"); - - Tensor* input = (Tensor*) input_ptr; - Tensor* filter = (Tensor*) filter_ptr; - - cudnnConvolutionDescriptor_t convDesc; - cudnnConvolutionFwdAlgo_t convAlgo; - cudnnConvolutionMode_t mode; - if(conv_mode == 0) - mode = CUDNN_CONVOLUTION; - else if(conv_mode == 1) - mode = CUDNN_CROSS_CORRELATION; - - mode = CUDNN_CROSS_CORRELATION; - // FIXIT: Need to be more aware of the implications of alpha and beta - float alpha = 1.0f, beta = 0.0f; - - // TODO: Support other cases; - hostToDeviceCopy(input); - hostToDeviceCopy(filter); - - convertToFP32(input); - convertToFP32(filter); - - - INFO("vertical_stride = %lu, horizontal_stride = %lu \n", vertical_stride, horizontal_stride); - - checkCUDNN(cudnnCreateConvolutionDescriptor(&convDesc)); - - //FIXME: Current hack to preserve backward compatibilty - if(conv_groups == 0){ - conv_groups = 1; - } - - - - cudnnDataType_t computeType = CUDNN_DATA_FLOAT; - // FIXIT: Think if upscaling values need to be configurable? - // IMP-FIXIT: Either make mode configurable OR see if CUDNN_CONVOLUTION MODE should be used? - checkCUDNN(cudnnSetConvolution2dDescriptor(convDesc, - vertical_pad, horizontal_pad, // conv padding - vertical_stride, horizontal_stride, // conv strides - 1, 1, // upscaling values - mode , // mode is configurable - computeType)); // defines compute precision - - // NOTE: Adding support for grouped convolution - checkCUDNN(cudnnSetConvolutionGroupCount(convDesc, conv_groups)); - - int n, c, h, w; // output dimensions - // Find dimension of convolution output - - if(input->tensor_desc == NULL || filter->filter_desc == NULL) - ERROR("Input or Filter descriptor is NULL"); - - checkCUDNN(cudnnGetConvolution2dForwardOutputDim(convDesc, - input->tensor_desc, - filter->filter_desc, - &n, &c, &h, &w)); - - - DEBUG("**Output Tensor Dims, n = %d, c = %d, h = %d, w = %d \n", n, c, h, w); - - Tensor* output; - if(input->data_format == CUDNN_TENSOR_NCHW) - output = (Tensor*) create4DTensor((cudnnDataType_t) float_type, - CUDNN_TENSOR_NCHW, n, c, h, w); - else if(input->data_format == CUDNN_TENSOR_NHWC){ - DEBUG("* NHWC Format \n"); - output = (Tensor*) create4DTensor((cudnnDataType_t) float_type, - CUDNN_TENSOR_NHWC, n, h, w, c); - } - else - ERROR("Unsupported Tensor Type"); - - // NOTE: Changing output tensor placement from host to device - changeTensorPlacement(output, DEVICE); - // NOTE: Necessary to insert the above call for every output tensor - - DEBUG("tensor->data_type = %d, tensor->data_format = %d, N = %d, C = %d, H = %d, W = %d \n", - output->data_type, output->data_format, output->dims.dim_sizes[0], - output->dims.dim_sizes[1], - output->dims.dim_sizes[2], output->dims.dim_sizes[3]); - - if(convDesc == NULL || input->tensor_desc == NULL || - filter->filter_desc == NULL || output->tensor_desc == NULL) - ERROR("NULL descriptor! \n"); - - - // Debugging info prints - printTensorDescInfo(input); - printTensorDescInfo(filter); - printTensorDescInfo(output); - - // NOTE-FIXIT: function failing for NHWC formats - perhaps some CUDNN support is lacking - checkCUDNN(cudnnGetConvolutionForwardAlgorithm(cudnnHandle, - input->tensor_desc, - filter->filter_desc, - convDesc, - output->tensor_desc, - CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, - //CUDNN_CONVOLUTION_FWD_NO_WORKSPACE, - 0, - &convAlgo)); - - - DEBUG("ConvAlgo = %d, FFT = %d, GEMM = %d, WINOGRAD = %d \n", convAlgo, - CUDNN_CONVOLUTION_FWD_ALGO_FFT, CUDNN_CONVOLUTION_FWD_ALGO_GEMM, - CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD); - - - // FIXIT: Algo shouldn't be hardcoded - //convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; - convAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; - - size_t workspace_size; - checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle, - input->tensor_desc, - filter->filter_desc, - convDesc, - output->tensor_desc, - convAlgo, - &workspace_size)); - - // Allocating memory for the convolution workspace - void* workspace; - checkCudaErrors(cudaMalloc(&workspace, workspace_size)); - DEBUG("workspace size = %d \n", workspace_size); - - - checkCUDNN(cudnnConvolutionForward(cudnnHandle, &alpha, input->tensor_desc, - input->gpu_data, filter->filter_desc, filter->gpu_data, - convDesc, convAlgo, workspace, workspace_size, - &beta, output->tensor_desc, output->gpu_data)); - - profileEvent("Conv_end", true); - - - #ifdef ERROR_INJECTION_ENABLED - - if(op_counter >= total_ops){ - ERROR("No accuracy flag found \n"); - } - - int op_acc = op_accuracies[op_counter]; - - // Ignore Error Injection for Depthwise Convolution - /*if (conv_groups > 1){ - op_acc = 0; - } - */ - - - void* error_norms = tensorAddError(output, op_acc); - add_norms(error_norms, "tensorConv", op_acc); - add_conv_overheads(input, filter, vertical_stride, horizontal_stride, op_acc); - - op_counter++; - - #endif - - - return output; -} - - - -// NOTE: Supports Max and Avg Pooling -void* tensorPooling(void* input_ptr, - int poolFunction, - int window_height, int window_width, - int vertical_pad, int horizontal_pad, - int vertical_stride, int horizontal_stride){ - - INFO("*** TensorPooling \n"); - profileEvent("Pool"); - - Tensor* input = (Tensor*) input_ptr; - - cudnnPoolingDescriptor_t poolDesc; - // FIXIT: Need to be more aware of the implications of alpha and beta - float alpha = 1.0f, beta = 0.0f; - - hostToDeviceCopy(input); - - convertToFP32(input); - - - checkCUDNN(cudnnCreatePoolingDescriptor(&poolDesc)); - - int n = input->dims.dim_sizes[0]; - int c = input->dims.dim_sizes[1]; - int h = (input->dims.dim_sizes[2] + (2 * vertical_pad) - window_height) / vertical_stride; - h = h + 1; - int w = (input->dims.dim_sizes[3] + (2 * horizontal_pad) - window_width) / horizontal_stride; - w = w + 1; - - DEBUG("n = %d, c = %d, h = %d, w = %d \n", n, c, h, w); - - // FIXIT: Don't be specific to floats - Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, n, c, h, w); - // Changing output tensor placement from host to device - changeTensorPlacement(output, DEVICE); - - // FIXIT: Fix being specific to CUDNN_DATA_FLOAT and NCHW format - // FIXIT: Is this setTensor even needed? - checkCUDNN(cudnnSetTensor4dDescriptor(output->tensor_desc, - CUDNN_TENSOR_NCHW, - CUDNN_DATA_FLOAT, - n, c, - h, w)); - - - cudnnPoolingMode_t pool_mode; - if(poolFunction == 0) - pool_mode = CUDNN_POOLING_MAX; - else if(poolFunction == 1) - pool_mode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; - - - - // FIXIT: Make the pool function (max, min, avg) configurable - checkCUDNN(cudnnSetPooling2dDescriptor(poolDesc, - //CUDNN_POOLING_MAX, - pool_mode, - CUDNN_PROPAGATE_NAN, - window_height, window_width, - vertical_pad, horizontal_pad, - vertical_stride, horizontal_stride)); - - checkCUDNN(cudnnPoolingForward(cudnnHandle, poolDesc, &alpha, input->tensor_desc, - input->gpu_data, &beta, output->tensor_desc, output->gpu_data)); - - profileEvent("Pool_end", true); - - - #ifdef ERROR_INJECTION_ENABLED - - if(op_counter >= total_ops){ - ERROR("No accuracy flag found \n"); - } - - int op_acc = op_accuracies[op_counter]; - void* error_norms = tensorAddError(output, op_acc); - add_norms(error_norms, "tensorPooling", op_acc); - add_pool_overheads(input, window_height, vertical_stride, op_acc); - - op_counter++; - - #endif - - - return output; -} - - - - -void* tensorGemmCPU(void* lhs_ptr, void* rhs_ptr){ - - INFO("*** TensorGemmCPU \n"); - - Tensor* lhs = (Tensor*) lhs_ptr; - Tensor* rhs = (Tensor*) rhs_ptr; - - // The operation is done on the CPU - deviceToHostCopy(lhs); - deviceToHostCopy(rhs); - - if(lhs->data_type != CUDNN_DATA_FLOAT){ - ERROR("Currently only Floating point is supported "); - } - - profileEvent("tensorGemmCPU"); - - INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims); - INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); - - // FIXIT: Need to be more aware of the implications of alpha and beta - //float alpha = 1.0f; - // float beta = 0.0f; - // 'm' holds the batch dimension - assuming NCHW format Tensors - int m = lhs->dims.dim_sizes[0]; - // The rhs must be a 2D tensor - int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons - int k = 1; - // Flattening the dimensions after the batch dimension - // NOTE: Allowing any number of dimensions > 2 for lhs - for (int j = 1 ; j < lhs->dims.num_dims; j++){ - k = k * lhs->dims.dim_sizes[j]; // input neurons - } - - int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2]; - // Dimension-note: Check if k is same across the two tensors - INFO("m = %d, n = %d, k = %d \n", m, n, k); - if(rhs_k != k){ - ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k); - } - - // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines - Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1); - // Changing output tensor placement from host to device - changeTensorPlacement(output, HOST); - - float* lhs_arr = (float*) lhs->host_data; - float* rhs_arr = (float*) rhs->host_data; - float* output_arr = (float*) output->host_data; - - for(int i = 0; i < m; i++){ - for(int j = 0; j < n; j++){ - float sum = 0.0; - for(int l = 0; l < k; l++){ - float mul = lhs_arr[i*k+l] * rhs_arr[l*n+j]; - sum = sum + mul; - } - output_arr[i*n+j] = sum; - } - } - - - profileEvent("tensorGemmCPU_end", true); - - return output; -} - - - -// Reference: https://gist.github.com/peterwittek/6303527 -void* tensorGemmGPU(void* lhs_ptr, void* rhs_ptr ){ //, void* result_tensor){ - - INFO("*** TensorGemmGPU \n"); - profileEvent("Mul"); - - Tensor* lhs = (Tensor*) lhs_ptr; - Tensor* rhs = (Tensor*) rhs_ptr; - - - INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims); - INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); - - // FIXIT: Need to be more aware of the implications of alpha and beta - float alpha = 1.0f, beta = 0.0f; - // 'm' holds the batch dimension - assuming NCHW format Tensors - int m = lhs->dims.dim_sizes[0]; - // The rhs last dimension must contain the neurons - int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons - int k = 1; - - // Flattening the dimensions after the batch dimension - // NOTE: Allowing any number of dimensions > 2 for lhs - for (int j = 1 ; j < lhs->dims.num_dims; j++){ - k = k * lhs->dims.dim_sizes[j]; // input neurons - } - - int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2]; - // Dimension-note: Check if k is same across the two tensors - INFO("m = %d, n = %d, k = %d \n", m, n, k); - if(rhs_k != k){ - ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k); - } - - Tensor* output = NULL; - DEBUG("Creating new TENSOR * \n"); - output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1); - - - /* else{ - DEBUG("Reusing TENSOR *\n"); - // FIXIT: Add Assertion to check for null pointer and dimension matching - output = (Tensor*) result_tensor; - // FIXIT: output value is trashing - Is this deallocated? - INFO("output->num_elems = %lu \n", output->data_type); - } - */ - - DEBUG("Changing placement *\n"); - // Changing output tensor placement from host to device - changeTensorPlacement(output, DEVICE); - - DEBUG("Changed Placement * \n\n"); - - hostToDeviceCopy(lhs); - hostToDeviceCopy(rhs); - - convertToFP32(lhs); - convertToFP32(rhs); - - - DEBUG("CuBlasSgemm *\n"); - - // INFO: cuBlas uses column-major format - // INFO: The leading dimension is just the FIRST Dimension - // IMP: output is N * M in column-major format, M*N in row-major - what cuDNN expects - checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N, - n, m, k, - &alpha, - (float*) rhs->gpu_data, n, - (float*) lhs->gpu_data, k, - &beta, - (float*) output->gpu_data, n)); - - - profileEvent("Mul_end", true); - - - - #ifdef ERROR_INJECTION_ENABLED - - if(op_counter >= total_ops){ - ERROR("No accuracy flag found \n"); - } - - int op_acc = op_accuracies[op_counter]; - - void* error_norms = tensorAddError(output, op_acc); - add_norms(error_norms, "tensorGemm", op_acc); - add_gemm_overheads(lhs_ptr, rhs_ptr, op_acc); - - op_counter++; - - #endif - - - return output; -} - - - - - - - -void* tensorGemm(void* lhs_ptr, void* rhs_ptr){ - - INFO("*** TensorGemm \n"); - profileEvent("tensorGemm"); - - Tensor* lhs = (Tensor*) lhs_ptr; - Tensor* rhs = (Tensor*) rhs_ptr; - - INFO("rhs->dims.num_dims = %d \n", rhs->dims.num_dims); - INFO("lhs->dims.num_dims = %d \n", lhs->dims.num_dims); - - // FIXIT: Need to be more aware of the implications of alpha and beta - float alpha = 1.0f, beta = 0.0f; - // 'm' holds the batch dimension - assuming NCHW format Tensors - int m = lhs->dims.dim_sizes[0]; - // The rhs last dimension must contain the neurons - int n = rhs->dims.dim_sizes[rhs->dims.num_dims-1]; // output neurons - int k = 1; - // Flattening the dimensions after the batch dimension - // NOTE: Allowing any number of dimensions > 2 for lhs - for (int j = 1 ; j < lhs->dims.num_dims; j++){ - k = k * lhs->dims.dim_sizes[j]; // input neurons - } - - int rhs_k = rhs->dims.dim_sizes[rhs->dims.num_dims-2]; - // Dimension-note: Check if k is same across the two tensors - INFO("m = %d, n = %d, k = %d \n", m, n, k); - if(rhs_k != k){ - ERROR("rhs=%d and lhs=%d columns/rows don't match", rhs_k, k); - } - - // NOTE: Creating a 4D tensor to be compatible with later called cuDNN routines - Tensor* output = (Tensor*) create4DTensor(CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m, n, 1, 1); - // Changing output tensor placement from host to device - changeTensorPlacement(output, DEVICE); - - hostToDeviceCopy(lhs); - hostToDeviceCopy(rhs); - - // NOTE: cuBlas uses column-major format - // NOTE: The leading dimension is the FIRST Dimension - // NOTE: The output is N * M in column-major format, M*N in row-major - what cuDNN expects - checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, - n, m, k, - &alpha, - (float*) rhs->gpu_data, k, - (float*) lhs->gpu_data, k, - &beta, - (float*) output->gpu_data, n)); - - profileEvent("tensorGemm_end", true); - - return output; -} - - - - -// FIXIT: Add dimension check assertions throughout the code -void* tensorGemmBias(void* input_ptr, void* bias_ptr){ - - INFO("*** TensorGemmBias \n"); - profileEvent("tensorGemmBias"); - - Tensor* input = (Tensor*) input_ptr; - Tensor* bias = (Tensor*) bias_ptr; - - // NOTE: beta is set to 1 to append to input - // C = A * B + Beta * C - float alpha = 1.0f, beta = 1.0f; - // 'm' holds the batch dimension - assuming NCHW format Tensors - int m = input->dims.dim_sizes[0]; - // The bias must be a 2D tensor - int n = bias->dims.dim_sizes[bias->dims.num_dims - 1]; // output neurons - - INFO("m = %d, n = %d \n", m, n); - - hostToDeviceCopy(input); - hostToDeviceCopy(bias); - - struct Tensor* onevec = (Tensor*) create2DTensor(CUDNN_DATA_FLOAT, m, 1); - fillOnes(onevec); - hostToDeviceCopy(onevec); - - // NOTE: cuBlas uses column-major format - // NOTE: The leading dimension is just the FIRST Dimension - checkCudaErrors(cublasSgemm(cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N, - n, m, 1, - &alpha, - (float*) bias->gpu_data, n, - (float*) onevec->gpu_data, 1, - &beta, - (float*) input->gpu_data, n)); - - profileEvent("tensorGemmBias_end", true); - - return input; -} - - -void* tensorRelu(void* input_ptr){ - - INFO("*** TensorRelu \n"); - profileEvent("Relu"); - - Tensor* input = (Tensor*) input_ptr; - - cudnnActivationDescriptor_t reluDesc; - float alpha = 1.0f, beta = 0.0f; - - hostToDeviceCopy(input); - - convertToFP32(input); - - - checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc)); - - checkCUDNN(cudnnSetActivationDescriptor(reluDesc, CUDNN_ACTIVATION_RELU, - CUDNN_PROPAGATE_NAN, 0.0)); - - checkCUDNN(cudnnActivationForward(cudnnHandle, reluDesc, &alpha, - input->tensor_desc, input->gpu_data, &beta, - input->tensor_desc, input->gpu_data)); - - profileEvent("Relu_end", true); - - - #ifdef ERROR_INJECTION_ENABLED - - if(op_counter >= total_ops){ - ERROR("No accuracy flag found \n"); - } - - int op_acc = op_accuracies[op_counter]; - - void* error_norms = tensorAddError(input, op_acc); - add_norms(error_norms, "tensorRelu", op_acc); - add_relu_overheads(input, op_acc); - op_counter++; - #endif - - - return input; -} - - -// Think: Should Softmax be broken into multiple IR operations? -void* tensorSoftmax(void* input_ptr){ - - INFO("*** TensorSoftmax \n"); - profileEvent("Softmax"); - - Tensor* input = (Tensor*) input_ptr; - float alpha = 1.0f, beta = 0.0f; - - hostToDeviceCopy(input); - convertToFP32(input); - - - // IMP: CUDNN_SOFTMAX_ACCURATE can be replaced with a less acurate CUDNN_SOFTMAX_FAST - checkCUDNN(cudnnSoftmaxForward(cudnnHandle, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, - &alpha, input->tensor_desc, input->gpu_data, &beta, - input->tensor_desc, input->gpu_data)); - - deviceToHostCopy(input); - profileEvent("Softmax_end", true); - - return input; -} - - - -__global__ void clipValues(float* A, float min, float max, int n){ - - int id = blockIdx.x * blockDim.x + threadIdx.x; - - if(id < n){ - A[id] = fmaxf(min, A[id]); - A[id] = fminf(max, A[id]); - } -} - - - -void* tensorRelu2(void* input_ptr, float min, float max){ - - INFO("*** TensorClippedRelu *** \n"); - profileEvent("Relu"); - - cudnnActivationDescriptor_t reluDesc; - float alpha = 1.0f, beta = 0.0f; - - Tensor* input = (Tensor*) input_ptr; - - hostToDeviceCopy(input); - - convertToFP32(input); - - - checkCUDNN(cudnnCreateActivationDescriptor(&reluDesc)); - - checkCUDNN(cudnnSetActivationDescriptor(reluDesc, CUDNN_ACTIVATION_CLIPPED_RELU, - CUDNN_PROPAGATE_NAN, max)); - - checkCUDNN(cudnnActivationForward(cudnnHandle, reluDesc, &alpha, - input->tensor_desc, input->gpu_data, &beta, - input->tensor_desc, input->gpu_data)); - - - - profileEvent("Relu_end", true); - - - #ifdef ERROR_INJECTION_ENABLED - - if(op_counter >= total_ops){ - ERROR("No accuracy flag found \n"); - } - - int op_acc = op_accuracies[op_counter]; - void* error_norms = tensorAddError(input, op_acc); - add_norms(error_norms, "tensorClippedRelu", op_acc); - add_relu_overheads(input, op_acc); - op_counter++; - #endif - - - return input; -} - - -void* tensorTanh(void* input_ptr){ - - INFO("*** TensorTanh \n"); - profileEvent("Tanh"); - - Tensor* input = (Tensor*) input_ptr; - - cudnnActivationDescriptor_t tanhDesc; - float alpha = 1.0f, beta = 0.0f; - - hostToDeviceCopy(input); - - convertToFP32(input); - - - checkCUDNN(cudnnCreateActivationDescriptor(&tanhDesc)); - - checkCUDNN(cudnnSetActivationDescriptor(tanhDesc, CUDNN_ACTIVATION_TANH, - CUDNN_PROPAGATE_NAN, 0.0)); - - checkCUDNN(cudnnActivationForward(cudnnHandle, tanhDesc, &alpha, - input->tensor_desc, input->gpu_data, &beta, - input->tensor_desc, input->gpu_data)); - - profileEvent("Tanh_end", true); - - - #ifdef ERROR_INJECTION_ENABLED - - if(op_counter >= total_ops){ - ERROR("No accuracy flag found \n"); - } - - int op_acc = op_accuracies[op_counter]; - void* error_norms = tensorAddError(input, op_acc); - add_norms(error_norms, "tensorTanh", op_acc); - add_relu_overheads(input, op_acc); - op_counter++; - #endif - - - return input; -} - - - - -void* tensorBatchNorm(void* input_ptr, void* gamma_ptr, void* beta_ptr, - void* mean_ptr, void* variance_ptr, double epsilon){ - - INFO("*** TensorBatchNorm \n"); - profileEvent("BatchNorm"); - - Tensor* input = (Tensor*) input_ptr; - Tensor* gamma = (Tensor*) gamma_ptr; - Tensor* beta = (Tensor*) beta_ptr; - Tensor* mean = (Tensor*) mean_ptr; - Tensor* variance = (Tensor*) variance_ptr; - - if (input == NULL || gamma == NULL || beta == NULL || mean == NULL || variance == NULL){ - ERROR("NULL Input Tensor"); - } - - float alpha_val = 1.0f, beta_val = 0.0f; - hostToDeviceCopy(input); - hostToDeviceCopy(gamma); - hostToDeviceCopy(beta); - hostToDeviceCopy(mean); - hostToDeviceCopy(variance); - - convertToFP32(input); - - - - checkCUDNN(cudnnBatchNormalizationForwardInference(cudnnHandle, CUDNN_BATCHNORM_SPATIAL, - &alpha_val, &beta_val, - input->tensor_desc, input->gpu_data, - input->tensor_desc, input->gpu_data, - gamma->tensor_desc, gamma->gpu_data, - beta->gpu_data, mean->gpu_data, - variance->gpu_data, - epsilon)); - - profileEvent("BatchNorm_end", true); - - - #ifdef ERROR_INJECTION_ENABLED - - if(op_counter >= total_ops){ - ERROR("No accuracy flag found \n"); - } - - int op_acc = op_accuracies[op_counter]; - //op_acc = 0; - void* error_norms = tensorAddError(input, op_acc); - add_norms(error_norms, "tensorBatchNorm", op_acc); - add_relu_overheads(input, op_acc); - op_counter++; - #endif - - - return input; -} - - - - -/************* GPU Layer API *************/ - -void* ConvLayer_GPU(void* input, - void* filter, - void* bias, - int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w, - int pool_id, int pool_size, - int activation_id, // Relu, Tanh, ClipRelu - float out_min, float out_max){ // NOTE: min_val, max_val apply to 'ClippedRelu' - - void* conv_out = tensorConvolution(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - 1, 0); - void* conv_add; - if(bias != NULL){ - conv_add = tensorAdd(conv_out, bias); - } - else{ - conv_add = conv_out; - } - - void* activation_out; - switch(activation_id){ - case -1: - activation_out = conv_add; - INFO("NO Activation Function \n"); - break; - case 0: - activation_out = tensorTanh(conv_add); - break; - case 1: - activation_out = tensorRelu(conv_add); - break; - case 2: - activation_out = tensorRelu2(conv_add, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - - - void* pool_out = activation_out; - // NOTE: Skip pooling on negative pool sizes - if(pool_size > 0){ - //FIXME: Currently only using MaxPooling - pool_out = tensorPooling(activation_out, 0, pool_size, pool_size, 0, 0, pool_size, pool_size); - } - else{ - pool_out = activation_out; - } - - return pool_out; -} - - -void* FCLayer_GPU(void* input, - void* weights, - void* bias, - int activation_id, - float out_min, float out_max){ // NOTE: min_val, max_val apply to 'ClippedRelu' - - void* gemm_out = tensorGemmGPU(input, weights); - - void* gemmbias_out; - if(bias != NULL){ - gemmbias_out = tensorAdd(gemm_out, bias); - } - else{ - gemmbias_out = gemm_out; - } - - void* activation_out; - switch(activation_id){ - - case -1: - activation_out = gemmbias_out; - INFO("No Activation Function \n"); - break; - case 0: - activation_out = tensorTanh(gemmbias_out); - break; - case 1: - activation_out = tensorRelu(gemmbias_out); - break; - case 2: - activation_out = tensorRelu2(gemmbias_out, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - - return activation_out; -} - - -/*********** PROMISE API **************/ - -/* -void* ConvLayer_PROMISE(void* input, float i_min, float i_max, - void* filter, float w_min, float w_max, - void* bias, float b_min, float b_max, - int conv_pad_h, int conv_pad_w, int conv_stride_h, int conv_stride_w, - int pool_id, int pool_size, - int activation_id, // Relu, Tanh, ClipRelu - float out_min, float out_max, int swing){ - - - #ifdef PROMISE_TUNER_ENABLED - - // NOTE: Skip reading file-based error levels for ApproxHPVM wrapper runtime - if(!approxhpvm_runtime_mode){ - - if(op_counter >= total_ops){ - ERROR("No accuracy flag found \n"); - } - - swing = op_accuracies[op_counter]; - op_counter++; - } - - #endif - - - if (swing < 0 || swing > 20){ - ERROR("Incorrect swing value"); - } - - - - if(swing < 8){ - input = quantizeTensorPromise(input, i_min, i_max); - filter = quantizeTensorPromise(filter, w_min, w_max); - if(bias != NULL) - bias = quantizeTensorPromise(bias, b_min, b_max); - // aRead error - - input = addPromiseError(input, swing); - } - - - void* conv_out; - if(swing == 8 || (swing >= 12 && swing <= 15) ){ - //conv_out = tensorConvPerf(input, filter, conv_pad_h, conv_pad_w, - // conv_stride_h, conv_stride_w, 1, 1, 1, 0); - - int rows = 2; - switch(swing){ - - case 12: rows = 5; break; - case 13: rows = 4; break; - case 14: rows = 3; break; - case 15: rows = 2; break; - - default: rows = 2; break; - } - - conv_out = tensorConvPerf2(input, filter, conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, 1, 1, rows, 0); - - /*void* gold = tensorConvolution(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - 1, 0); - - Norm_t* norms = calculateNormsTreeReduction((struct Tensor*) conv_out, (struct Tensor*) gold); - - DEBUG("\n-------- l2_norm = %f \n", norms->l2_norm); - */ - - - - /* ----- - } - else if(swing == 9 || (swing >= 16 && swing <= 19) ){ - //conv_out = tensorConvPerf(input, filter, conv_pad_h, conv_pad_w, - // conv_stride_h, conv_stride_w, 1, 1, 0, 1); - - - int cols = 2; - switch(swing){ - - case 16: cols = 5; break; - case 17: cols = 4; break; - case 18: cols = 3; break; - case 19: cols = 2; break; - - default: cols = 2; break; - } - - - conv_out = tensorConvPerf2(input, filter, conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, 1, 1, 0, cols); - - - /*void* gold = tensorConvolution(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - 1, 0); - - Norm_t* norms = calculateNormsTreeReduction((struct Tensor*)conv_out, (struct Tensor*) gold); - - DEBUG("\n-------- l2_norm = %f \n", norms->l2_norm); - */ - - /*------ - } - else if(swing == 10){ - conv_out = tensorHalfConvolution(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - 1, 0); - } - else{ - conv_out = tensorConvolution(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - 1, 0); - } - - void* conv_add; - if(bias != NULL){ - if(swing >= 8){ - conv_add = tensorHalfAdd(conv_out, bias); - } - else{ - conv_add = tensorAdd(conv_out, bias); - } - } - else{ - conv_add = conv_out; - } - - void* pool_out; - // NOTE: Skip pooling on negative pool sizes - if(pool_size > 0){ - //FIXME: Currently only using MaxPooling - pool_out = tensorHalfPooling(conv_add, 0, pool_size, pool_size, 0, 0, pool_size, pool_size); - } - else{ - pool_out = conv_add; - } - - void* activation_out; - switch(activation_id){ - case -1: - activation_out = pool_out; - INFO("NO Activation Function \n"); - break; - case 0: - activation_out = tensorHalfTanh(pool_out); - break; - case 1: - activation_out = tensorHalfRelu(pool_out); - break; - case 2: - activation_out = tensorHalfRelu2(pool_out, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - - - if(swing < 8 && activation_id != -1){ - activation_out = quantizeTensorPromise(activation_out, out_min, out_max); - } - - return activation_out; -} - - -void* FCLayer_PROMISE(void* input, float i_min, float i_max, - void* weights, float w_min, float w_max, - void* bias, float b_min, float b_max, - int activation_id, - float out_min, float out_max, int swing){ //NOTE: min_val, max_val apply to 'ClippedRelu' - - - - #ifdef PROMISE_TUNER_ENABLED - - // NOTE: Skip reading file-based error levels for ApproxHPVM wrapper runtime - if(!approxhpvm_runtime_mode){ - - if(op_counter >= total_ops){ - ERROR("No accuracy flag found \n"); - } - - swing = op_accuracies[op_counter]; - op_counter++; - } - - #endif - - - if (swing < 0 || swing > 20){ - ERROR("Incorrect swing value"); - } - - if(swing < 8){ - input = quantizeTensorPromise(input, i_min, i_max); - weights = quantizeTensorPromise(weights, w_min, w_max); - if(bias != NULL) - bias = quantizeTensorPromise(bias, b_min, b_max); - - // NOTE: Modelling aRead error in PROMISE - input = addPromiseError(input, swing); - } - - - - void* gemm_out; - if(swing >= 8 && swing < 11){ - gemm_out = tensorHalfGemm(input, weights); - } - else{ - gemm_out = tensorGemmGPU(input, weights); - } - - - void* gemmbias_out; - if(bias != NULL){ - // Swing 8 corresponds to FP32 - if(swing >= 8 && swing < 20){ - gemmbias_out = tensorHalfAdd(gemm_out, bias); - } - else{ - gemmbias_out = tensorAdd(gemm_out, bias); - } - } - else{ - gemmbias_out = gemm_out; - } - - void* activation_out; - switch(activation_id){ - - case -1: - activation_out = gemmbias_out; - INFO("No Activation Function \n"); - break; - case 0: - activation_out = tensorTanh(gemmbias_out); - break; - case 1: - activation_out = tensorRelu(gemmbias_out); - break; - case 2: - activation_out = tensorRelu2(gemmbias_out, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - - - if(swing < 8 && activation_id != -1){ - activation_out = quantizeTensorPromise(activation_out, out_min, out_max); - } - - return activation_out; -} - -*****/ - - - -/**** Wrapper Runtime API ***/ - -void* wrapper_ConvLayer(const char* hpvm_node_id, - void* input, - void* filter, - void* bias, - int conv_pad_h, int conv_pad_w, - int conv_stride_h, int conv_stride_w, - int pool_id, int pool_size, - int activation_id, - // NOTE: out_min, out_max are only relevant for ClippedRelu - float out_min, float out_max){ - - NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); - - if (NodeConf->isPROMISENodeConfiguration()) { - DEBUG("PROMISE Configuration for ConvLayer\n"); - // Mapped to PROMISE - get a PROMISE node configuration - PROMISENodeConfiguration *PROMISEConf = (PROMISENodeConfiguration *)NodeConf; - std::vector<float> &QRanges = RC->getQuantizationRanges(hpvm_node_id); - - std::vector<std::pair<PROMISENodeConfiguration::APPROX, int> > &approxTuples = - PROMISEConf->getApproxChoices(); - - if (approxTuples.size() == 1) { - enum PROMISENodeConfiguration::APPROX approx = approxTuples[0].first; - int param = approxTuples[0].second; - if (approx == PROMISENodeConfiguration::APPROX::SWING_LEVEL) { - DEBUG("Approximation choice for ConvLayer: swing level %d\n", param); - - struct Tensor* input_tensor_cast = (struct Tensor*) input; - struct Tensor* filter_tensor_cast = (struct Tensor*) filter; - std::pair<double, double> pinfo = - RC->conv_profile(input_tensor_cast->dims.dim_sizes[0], //n - input_tensor_cast->dims.dim_sizes[1], //c - input_tensor_cast->dims.dim_sizes[2], //h - input_tensor_cast->dims.dim_sizes[3], //w - filter_tensor_cast->dims.dim_sizes[0], //c_out - filter_tensor_cast->dims.dim_sizes[1], //c_in - filter_tensor_cast->dims.dim_sizes[2], //k_h - filter_tensor_cast->dims.dim_sizes[3], //k_w - conv_stride_h, //s_h - conv_stride_w, //s_w - param, //voltage_swing - filter_tensor_cast->dims.dim_sizes[2] * - filter_tensor_cast->dims.dim_sizes[3] /*patch_factor: k_h*k_w*/); - RC->addToCurrentIterationComputeTime("ConvLayer_PROMISE", pinfo.first); - RC->addToCurrentIterationComputeEnergy("ConvLayer_PROMISE", pinfo.second); - void* t_out; - t_out = PROMISE_Conv(input, QRanges[0], QRanges[1], - filter, QRanges[2], QRanges[3], - bias, QRanges[4], QRanges[5], - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - pool_id, pool_size, - activation_id, - QRanges[6], QRanges[7], param); - - return t_out; - } else { - CUSTOM_ASSERT(false && "Unknown approximation type"); - ERROR("Unknown approximation type"); - abort(); - } - // TODO additional approx methods implemented here - - } else if (approxTuples.size() == 2) { - ERROR("Currently unsupported case"); - abort(); - } else { - ERROR("Unsupported case"); - abort(); - } - } - else - if (NodeConf->isGPUNodeConfiguration()) { - DEBUG("GPU Configuration for ConvLayer\n"); - // Mapped to GPU - get a GPU node configuration - GPUNodeConfiguration *GPUConf = (GPUNodeConfiguration *)NodeConf; - - std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP, - std::vector< std::pair<GPUNodeConfiguration::APPROX, - int> > > > &ApproxChoices = - GPUConf->getApproxChoices(); - - // Check for convolution as first operation - CUSTOM_ASSERT((ApproxChoices.size() >= 1) && - (ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::CONV) && - "Incorrect number/type of operations in provided Conv layer configuration"); - - void* conv_out = handleTensorConvApproximationTuples(ApproxChoices[0].second, - input, filter, conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w); - void* add_out; - if (bias != NULL) { - // Check for add as second operation - CUSTOM_ASSERT((ApproxChoices.size() >= 2) && - (ApproxChoices[1].first == GPUNodeConfiguration::TENSOR_OP::ADD) && - "Incorrect number/type of operations in provided Conv layer configuration"); - add_out = handleTensorAddApproximationTuples(ApproxChoices[1].second, - conv_out, bias); - } else { - add_out = conv_out; - } - - void* activation_out; - switch (activation_id) { - case -1: - { // No activation - INFO("No activation Function\n"); - activation_out = add_out; - } - break; - case 0: - { // TanH activation - CUSTOM_ASSERT((ApproxChoices.size() >= 3) && - (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::TANH) && - "Incorrect number/type of operations in provided Conv layer configuration"); - activation_out = handleTensorTanhApproximationTuples(ApproxChoices[2].second, - add_out); - } - break; - case 1: - { // ReLU activation - CUSTOM_ASSERT((ApproxChoices.size() >= 3) && - (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::RELU) && - "Incorrect number/type of operations in provided Conv layer configuration"); - activation_out = handleTensorReluApproximationTuples(ApproxChoices[2].second, - add_out); - } - break; - case 2: - { // Clipped ReLU activation - CUSTOM_ASSERT((ApproxChoices.size() >= 3) && - (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU) && - "Incorrect number/type of operations in provided Conv layer configuration"); - activation_out = - handleTensorClippedReluApproximationTuples(ApproxChoices[2].second, - add_out, out_min, out_max); - } - break; - default: - { - ERROR("Activation id %d NOT supported \n", activation_id); - } - break; - } - - void* pool_out; - - if (pool_size > 0) { - switch (pool_id) { - case 0: - { - // If we remove the asserts, we can have all cases handled by a single call - CUSTOM_ASSERT((ApproxChoices.back().first == GPUNodeConfiguration::TENSOR_OP::POOL_MAX) && - "Expected POOL_MAX in provided Conv layer configuration"); - pool_out = - handleTensorPoolingApproximationTuples(ApproxChoices.back().second, - activation_out, pool_id, - pool_size, pool_size, 0, 0, - pool_size, pool_size); - } - break; - case 1: - { - CUSTOM_ASSERT((ApproxChoices.back().first == GPUNodeConfiguration::TENSOR_OP::POOL_MEAN) && - "Expected POOL_MEAN in provided Conv layer configuration"); - pool_out = - handleTensorPoolingApproximationTuples(ApproxChoices.back().second, - activation_out, pool_id, - pool_size, pool_size, 0, 0, - pool_size, pool_size); - } - break; - case 2: - { - CUSTOM_ASSERT((ApproxChoices.back().first == GPUNodeConfiguration::TENSOR_OP::POOL_MIN) && - "Expected POOL_MIN in provided Conv layer configuration"); - pool_out = - handleTensorPoolingApproximationTuples(ApproxChoices.back().second, - activation_out, pool_id, - pool_size, pool_size, 0, 0, - pool_size, pool_size); - } - break; - default: - { - ERROR("Pool id %d NOT supported \n", pool_id); - } - break; - } - } else { - pool_out = activation_out; - } - return pool_out; - } - else { - ERROR("Unsupported Configuration"); - abort(); - } - - return NULL; -} - - -void* wrapper_FCLayer(const char* hpvm_node_id, - void* input, - void* weights, - void* bias, - int activation_id, - // NOTE: out_min and out_max are only relevant for ClippedRelu - float out_min, float out_max){ - - NodeConfiguration *NodeConf = RC->getNodeConfiguration(hpvm_node_id); - - if (NodeConf->isPROMISENodeConfiguration()) { - DEBUG("PROMISE Configuration for FCLayer\n"); - // Mapped to PROMISE - get a PROMISE node configuration - PROMISENodeConfiguration *PROMISEConf = (PROMISENodeConfiguration *)NodeConf; - std::vector<float> &QRanges = RC->getQuantizationRanges(hpvm_node_id); - - std::vector<std::pair<PROMISENodeConfiguration::APPROX, int> > &approxTuples = - PROMISEConf->getApproxChoices(); - - if (approxTuples.size() == 1) { - enum PROMISENodeConfiguration::APPROX approx = approxTuples[0].first; - int param = approxTuples[0].second; - if (approx == PROMISENodeConfiguration::APPROX::SWING_LEVEL) { - DEBUG("Approximation choice for FCLayer: swing level %d\n", param); - - struct Tensor* input_tensor_cast = (struct Tensor*) input; - struct Tensor* weights_tensor_cast = (struct Tensor*) weights; - CUSTOM_ASSERT((input_tensor_cast->dims.dim_sizes[1] * - input_tensor_cast->dims.dim_sizes[2] * - input_tensor_cast->dims.dim_sizes[3] == - weights_tensor_cast->dims.dim_sizes[2]) && - "Dimensions for matrix multiplication do not match."); - std::pair<double, double> pinfo = - RC->fc_profile(input_tensor_cast->dims.dim_sizes[0], //num_rows_a, - input_tensor_cast->dims.dim_sizes[1] * - input_tensor_cast->dims.dim_sizes[2] * - input_tensor_cast->dims.dim_sizes[3], //num_cols_a, - weights_tensor_cast->dims.dim_sizes[2], //num_rows_b, - weights_tensor_cast->dims.dim_sizes[3], //num_cols_b, - param, //voltage_swing, - 1 /*patch_factor*/); - RC->addToCurrentIterationComputeTime("FCLayer_PROMISE", pinfo.first); - RC->addToCurrentIterationComputeEnergy("FCLayer_PROMISE", pinfo.second); - void* t_out; - t_out = PROMISE_FC(input, QRanges[0], QRanges[1], - weights, QRanges[2], QRanges[3], - bias, QRanges[4], QRanges[5], - activation_id, - QRanges[6], QRanges[7], param); - return t_out; - } else { - CUSTOM_ASSERT(false && "Unknown approximation type"); - ERROR("Unknown approximation type"); - abort(); - } - // TODO additional approx methods implemented here - - } else if (approxTuples.size() == 2) { - ERROR("Currently unsupported case"); - abort(); - } else { - ERROR("Unsupported case"); - abort(); - } - } - else - if (NodeConf->isGPUNodeConfiguration()) { - DEBUG("GPU Configuration for FCLayer\n"); - // Mapped to GPU - get a GPU node configuration - GPUNodeConfiguration *GPUConf = (GPUNodeConfiguration *)NodeConf; - - std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP, - std::vector< std::pair<GPUNodeConfiguration::APPROX, - int> > > > &ApproxChoices = - GPUConf->getApproxChoices(); - - // Approximation choices must be for a FC wrapper operation - CUSTOM_ASSERT((ApproxChoices.size() == 2 || ApproxChoices.size() == 3) && - ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::MUL && - ApproxChoices[1].first == GPUNodeConfiguration::TENSOR_OP::ADD && - "Invalid configuration generated for FC layer wrapper operation"); - - void* gemm_out = handleTensorMulApproximationTuples(ApproxChoices[0].second, - input, weights); - void* add_out = handleTensorAddApproximationTuples(ApproxChoices[1].second, - gemm_out, bias); - - void* activation_out; - switch (activation_id) { - case -1: - { // No activation - CUSTOM_ASSERT((ApproxChoices.size() == 2) && - "Incorrect number of operations in provided FC layer configuration"); - INFO("No activation Function\n"); - activation_out = add_out; - } - break; - case 0: - { // TanH activation - CUSTOM_ASSERT((ApproxChoices.size() == 3) && - (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::TANH) && - "Incorrect number/type of operations in provided FC layer configuration"); - activation_out = handleTensorTanhApproximationTuples(ApproxChoices[1].second, - add_out); - } - break; - case 1: - { // ReLU activation - CUSTOM_ASSERT((ApproxChoices.size() == 3) && - (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::RELU) && - "Incorrect number/type of operations in provided FC layer configuration"); - activation_out = handleTensorReluApproximationTuples(ApproxChoices[1].second, - add_out); - } - break; - case 2: - { // Clipped ReLU activation - CUSTOM_ASSERT((ApproxChoices.size() == 3) && - (ApproxChoices[2].first == GPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU) && - "Incorrect number/type of operations in provided FC layer configuration"); - activation_out = - handleTensorClippedReluApproximationTuples(ApproxChoices[1].second, - add_out, out_min, out_max); - } - break; - default: - { - ERROR("Activation id %d NOT supported \n", activation_id); - } - break; - } - return activation_out; - } - else { - ERROR("Unsupported Configuration"); - abort(); - } - - return NULL; -} - - - - -void* wrapper_tensorRelu(const char* hpvm_node_id, void* input_ptr){ -// return tensorRelu(input_ptr); - - // Only mapped to GPU - get a GPU configuration - GPUNodeConfiguration *GPUConf = - (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id); - - std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP, - std::vector< std::pair<GPUNodeConfiguration::APPROX, - int> > > > &ApproxChoices = - GPUConf->getApproxChoices(); - - // Approximation choices must be for a relu operation - CUSTOM_ASSERT(ApproxChoices.size() == 1 && - ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::RELU && - "Invalid configuration generated for tensor relu wrapper operation"); - - return handleTensorReluApproximationTuples(ApproxChoices[0].second, - input_ptr); - -} - -void* wrapper_tensorClippedRelu(const char* hpvm_node_id, - void* input_ptr, - float out_min, float out_max){ - // Only mapped to GPU - get a GPU configuration - GPUNodeConfiguration *GPUConf = - (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id); - - std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP, - std::vector< std::pair<GPUNodeConfiguration::APPROX, - int> > > > &ApproxChoices = - GPUConf->getApproxChoices(); - - // Approximation choices must be for a relu operation - CUSTOM_ASSERT(ApproxChoices.size() == 1 && - ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::CLIPPED_RELU && - "Invalid configuration generated for tensor clipped relu wrapper operation"); - - return handleTensorClippedReluApproximationTuples(ApproxChoices[0].second, - input_ptr, out_min, out_max); - -} - -void* wrapper_tensorTanh(const char* hpvm_node_id, void* input_ptr){ -// return tensorTanh(input_ptr); - - GPUNodeConfiguration *GPUConf = - (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id); - - std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP, - std::vector< std::pair<GPUNodeConfiguration::APPROX, - int> > > > &ApproxChoices = - GPUConf->getApproxChoices(); - - // Approximation choices must be for a tanh operation - CUSTOM_ASSERT(ApproxChoices.size() == 1 && - ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::TANH && - "Invalid configuration generated for tensor tanh wrapper operation"); - - return handleTensorTanhApproximationTuples(ApproxChoices[0].second, - input_ptr); - -} - - -void* wrapper_tensorBatchNorm(const char* hpvm_node_id, - void* input_ptr, void* gamma_ptr, void* beta_ptr, - void* mean_ptr, void* variance_ptr, double epsilon){ -// return tensorBatchNorm(input_ptr, gamma_ptr, beta_ptr, mean_ptr, variance_ptr, epsilon); - - // Only mapped to GPU - get a GPU configuration - GPUNodeConfiguration *GPUConf = - (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id); - - std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP, - std::vector< std::pair<GPUNodeConfiguration::APPROX, - int> > > > &ApproxChoices = - GPUConf->getApproxChoices(); - - // Approximation choices must be for a batchnorm operation - CUSTOM_ASSERT(ApproxChoices.size() == 1 && - ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::BATCHNORM && - "Invalid configuration generated for tensor batchnorm wrapper operation"); - - return handleTensorBatchNormApproximationTuples(ApproxChoices[0].second, - input_ptr, gamma_ptr, beta_ptr, - mean_ptr, variance_ptr, epsilon); - -} - - -void* wrapper_tensorAdd(const char* hpvm_node_id, void* input_ptr, void* bias_ptr){ -// return tensorAdd(input_ptr, bias_ptr); - - // Only mapped to GPU - get a GPU configuration - GPUNodeConfiguration *GPUConf = - (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id); - - std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP, - std::vector< std::pair<GPUNodeConfiguration::APPROX, - int> > > > &ApproxChoices = - GPUConf->getApproxChoices(); - - // Approximation choices must be for an add operation - CUSTOM_ASSERT(ApproxChoices.size() == 1 && - ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::ADD && - "Invalid configuration generated for tensor add wrapper operation"); - - return handleTensorAddApproximationTuples(ApproxChoices[0].second, - input_ptr, bias_ptr); - -} - - -void* wrapper_tensorPooling(const char* hpvm_node_id, - void* input_ptr, - int poolFunction, - int window_height, int window_width, - int vertical_pad, int horizontal_pad, - int vertical_stride, int horizontal_stride){ - -// return tensorPooling(input_ptr, poolFunction, window_height, window_width, -// vertical_pad, horizontal_pad, vertical_stride, horizontal_stride); - - // Only mapped to GPU - get a GPU configuration - GPUNodeConfiguration *GPUConf = - (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id); - - std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP, - std::vector< std::pair<GPUNodeConfiguration::APPROX, - int> > > > &ApproxChoices = - GPUConf->getApproxChoices(); - - // Approximation choices must be for a single operation - CUSTOM_ASSERT(ApproxChoices.size() == 1 && - "Invalid configuration generated for tensor pool wrapper operation"); - enum GPUNodeConfiguration::TENSOR_OP top = ApproxChoices[0].first; - // Approximation choices must be for a pool operation - CUSTOM_ASSERT((top == GPUNodeConfiguration::TENSOR_OP::POOL_MAX || - top == GPUNodeConfiguration::TENSOR_OP::POOL_MEAN || - top == GPUNodeConfiguration::TENSOR_OP::POOL_MIN) && - "Invalid configuration generated for tensor pool wrapper operation"); - - return handleTensorPoolingApproximationTuples(ApproxChoices[0].second, - input_ptr, poolFunction, - window_height, window_width, - vertical_pad, horizontal_pad, - vertical_stride, horizontal_stride); - -} - - -void* wrapper_tensorGroupConvolution(const char* hpvm_node_id, - void* input, void* filter, - int vertical_pad, int horizontal_pad, - int vertical_stride, int horizontal_stride, - int conv_mode, int conv_groups){ - // Only mapped to GPU - get a GPU configuration - GPUNodeConfiguration *GPUConf = - (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id); - - std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP, - std::vector< std::pair<GPUNodeConfiguration::APPROX, - int> > > > &ApproxChoices = - GPUConf->getApproxChoices(); - - // Approximation choices must be for a group_conv operation - CUSTOM_ASSERT(ApproxChoices.size() == 1 && - ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::GROUP_CONV && - "Invalid configuration generated for tensor group_conv wrapper operation"); - - return handleTensorGroupConvApproximationTuples(ApproxChoices[0].second, - input, filter, - vertical_pad, horizontal_pad, - vertical_stride, horizontal_stride, - conv_mode, conv_groups); - -} - - - -void* wrapper_tensorSoftmax(const char* hpvm_node_id, void* input_ptr){ -// return tensorSoftmax(input_ptr); - - // Only mapped to GPU - get a GPU configuration - GPUNodeConfiguration *GPUConf = - (GPUNodeConfiguration *)RC->getNodeConfiguration(hpvm_node_id); - - std::vector< std::pair< GPUNodeConfiguration::TENSOR_OP, - std::vector< std::pair<GPUNodeConfiguration::APPROX, - int> > > > &ApproxChoices = - GPUConf->getApproxChoices(); - - // Approximation choices must be for a softmax operation - CUSTOM_ASSERT(ApproxChoices.size() == 1 && - ApproxChoices[0].first == GPUNodeConfiguration::TENSOR_OP::SOFTMAX && - "Invalid configuration generated for tensor softmax wrapper operation"); - - return handleTensorSoftmaxApproximationTuples(ApproxChoices[0].second, input_ptr); - - -} - - - diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/wrapper_runtime_back.cu b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/wrapper_runtime_back.cu deleted file mode 100644 index f6c4fff296debd5bd0f5c5287ee28824b00c1380..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/legacy/wrapper_runtime_back.cu +++ /dev/null @@ -1,123 +0,0 @@ - -#include <stdio.h> -#include <cstdio> -#include <cstdlib> -#include <iostream> -#include <string> -#include <vector> - -#include <cuda_runtime.h> -#include <device_launch_parameters.h> - -#include <cublas_v2.h> -#include <cudnn.h> -#include <cublas_api.h> -#include <cuda_fp16.h> -#include <driver_types.h> - - -// Tensor runtime header files -#include "../include/tensor_runtime.h" -#include "../include/tensor_utils.h" -#include "../include/debug.h" -#include "../include/profiling.h" -#include "../include/fp16_conversion.h" -#include "../include/global_data.h" -#include "../include/error.h" -#include "../include/tensor.h" -#include "../include/op_overheads.h" -#include "../include/half_precision_api.h" - - - -/*********** Generic Layer API **************/ - -void* ConvLayerWrapper(void* input, - void* filter, - void* bias, - int conv_pad_h, int conv_pad_w, - int conv_stride_h, int conv_stride_w, - int pool_id, int pool_size, - int activation_id, - // NOTE: out_min, out_max are only relevant for ClippedRelu - float out_min, float out_max){ - - - void* conv_out = tensorConvolution(input, filter, - conv_pad_h, conv_pad_w, - conv_stride_h, conv_stride_w, - 1, 0); - - void* conv_add = tensorAdd(conv_out, bias); - - void* pool_out; - // NOTE: Skip pooling when pool size is not greater than 0 - if(pool_size > 0){ - //FIXME: Currently only using MaxPooling - pool_out = tensorPooling(conv_add, 0, pool_size, pool_size, 0, 0, pool_size, pool_size); - } - else{ - pool_out = conv_add; - } - - void* activation_out; - switch(activation_id){ - case -1: - activation_out = pool_out; - INFO("NO Activation Function \n"); - break; - case 0: - activation_out = tensorTanh(pool_out); - break; - case 1: - activation_out = tensorRelu(pool_out); - break; - case 2: - activation_out = tensorRelu2(pool_out, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - - return activation_out; -} - - -void* FCLayerWrapper(void* input, - void* weights, - void* bias, - int activation_id, - // NOTE: out_min and out_max are only relevant for ClippedRelu - float out_min, float out_max){ - - - void* gemm_out = tensorGemmGPU(input, weights); - - void* gemmbias_out = tensorAdd(gemm_out, bias); - - void* activation_out; - switch(activation_id){ - - case -1: - activation_out = gemmbias_out; - INFO("No Activation Function \n"); - break; - case 0: - activation_out = tensorTanh(gemmbias_out); - break; - case 1: - activation_out = tensorRelu(gemmbias_out); - break; - case 2: - activation_out = tensorRelu2(gemmbias_out, out_min, out_max); - break; - default: - ERROR("Activation id %d NOT supported \n", activation_out); - break; - } - - return activation_out; -} - -