diff --git a/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py b/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py index 36247ee173657b9b3b229638d6155dab14f0577f..2c57eaf5be7c09a05859221535a7aff709330fcf 100644 --- a/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py +++ b/llvm/projects/hpvm-tensor-rt/build_pldi/table_generator.py @@ -2,9 +2,21 @@ import glob import os import subprocess import shutil +import sys from collections import defaultdict +''' +FORMAT + +** LayerName NumOpsInLayer <cols> +OpName Col1Val Col2Val ... + +** Conv1 1 h2f_time h2f_energy fp32_time fp32_energy f2h_time f2h_energy fp16_perf_time fp16_perf_energy fp16_time fp16_energy +Conv1 51.8808 97.2844 319.582 601.966 12.81 18.758 388.092 650.649 340.037 590.664 + +''' + class TableGenerator: __ops_header_delimiter = "#" @@ -53,7 +65,7 @@ class TableGenerator: 3. Writes the internal table to <network_name>_tensors.txt file and uses the <network_name>_ops.txt file as a guideline in terms of row order ''' - self.__run_inputted_binaries() + #self.__run_inputted_binaries() self.__build_internal_table() self.__output_table_to_file() @@ -282,8 +294,11 @@ class TableGenerator: if __name__ == "__main__": - binary_dir_path = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_pldi/mobilenet" - num_iters = 1 - profiler_binary_name = "/home/nvidia/awesome_profiler/pp" + if len(sys.argv) != 4: + print("python table_generator.py <binary dir path> <num itrs> <profiler bin path>") + exit(1) + binary_dir_path = sys.argv[1] + num_iters = int(sys.argv[2]) + profiler_binary_name = sys.argv[3] table_gen = TableGenerator(binary_dir_path, num_iters, profiler_binary_name) table_gen.generate_table() diff --git a/llvm/projects/soc_simulator/src/driver.py b/llvm/projects/soc_simulator/src/driver.py index c747f47d998edaef603a4fc4f38fccb9a4207ea6..1df46eec8fc34cee7c6a7683d1faaae4a94639ca 100644 --- a/llvm/projects/soc_simulator/src/driver.py +++ b/llvm/projects/soc_simulator/src/driver.py @@ -1,126 +1,299 @@ -# Python driver -- ported from Perl driver (driver.pl) - from collections import defaultdict import os +import subprocess import sys -def build_nested_default_dict(): - return defaultdict(build_nested_default_dict) +class Driver: + fp16_swing = 8 -tensor_layers = defaultdict(build_nested_default_dict) + class ApproxTypes: + FP16 = 0 + FP32 = 1 + PROMISE = 2 -def is_conv(operation_name): - return operation_name.startswith("Conv") + results_time_key = "Time" + results_energy_key = "Energy" -def is_nml(operation_name): - return operation_name.startswith("NML") -def is_fc(operation_name): - return operation_name.startswith("FC") + def driver(self): + self.__parse_tensor_layer_file() + self.__parse_tensor_table() + self.__run_simulations() + self.__display_results() -def parse_tensor_layer_file(layer_filename): - ''' - Convs: Layer name, N, Cin, H, W, Cout, Kh, Kw, Sh, Sw - FCs: Layer name, Rows_A, Cols_A, Rows_B, Cols_B - NMLs (No Man Lands):Â Â NML<number>Â (edited)Â - ''' - if not os.path.isfile(layer_filename): - print("ERROR: %s was not found." % layer_filename) - exit(1) - layer_file = open(layer_filename, "r") - for line in layer_file: - layer_data = line.strip().split(',') - layer_name = layer_data[0] - - if is_conv(layer_name): - tensor_layers[layer_name]["N"] = layer_data[1] - tensor_layers[layer_name]["Cin"] = layer_data[2] - tensor_layers[layer_name]["H"] = layer_data[3] - tensor_layers[layer_name]["W"] = layer_data[4] - tensor_layers[layer_name]["Cout"] = layer_data[5] - tensor_layers[layer_name]["Kh"] = layer_data[6] - tensor_layers[layer_name]["Kw"] = layer_data[7] - tensor_layers[layer_name]["Sh"] = layer_data[8] - tensor_layers[layer_name]["Sw"] = layer_data[9] - - elif is_fc(layer_name): - tensor_layers[layer_name]["RA"] = layer_data[1] - tensor_layers[layer_name]["CA"] = layer_data[2] - tensor_layers[layer_name]["RB"] = layer_data[3] - tensor_layers[layer_name]["CB"] = layer_data[4] - - elif not is_nml(layer_name): # TODO should we store data for NMLs? - print("ERROR: Invalid layer name %s" % layer_name) - exit(1) - - layer_file.close() - -# should this be a nested dict of dicts? -# [layer_name][operation_name][cols] -tensor_table = defaultdict(build_nested_default_dict) - -def parse_tensor_table(table_filename): - if not os.path.isfile(table_filename): - print("ERROR: %s was not found." % table_filename) - exit(1) + def __init__(self, layer_filename, table_filename, config_filename, results_filename): + self.__layer_filename = layer_filename + self.__table_filename = table_filename + self.__config_filename = config_filename + self.__results_filename = results_filename + + # NOTE: Use an OrderedDict if we want to search by operation name + # Using a list bc we care about the order the data is read in + # since it corresponds to the data in the configuration file + self.__tensor_layers = [] + + # [layer_name][operation_name][cols] + # Operation names need to be stored in order of insertion + self.__tensor_table = defaultdict(lambda: list(defaultdict(str))) + + # [Time/Energy][number corresponding to order the layer config was read in] = time/energy + self.__aggregate_results = defaultdict(lambda: defaultdict(float)) + self.__config_count = 0 + + + @staticmethod + def is_conv(operation_name): + return operation_name.startswith("Conv") + + + @staticmethod + def is_nml(operation_name): + return operation_name.startswith("NML") + + + @staticmethod + def is_fc(operation_name): + return operation_name.startswith("FC") + + + def __parse_tensor_layer_file(self): + if not os.path.isfile(self.__layer_filename): + print("ERROR: %s was not found." % self.__layer_filename) + exit(1) + + layer_file = open(self.__layer_filename, "r") + for line in layer_file: + layer_data = line.strip().split(',') + layer_name = layer_data[0] + + tensor_layer = defaultdict(str) + tensor_layer["Name"] = layer_name + + if Driver.is_conv(layer_name): + tensor_layer["N"] = float(layer_data[1]) + tensor_layer["Cin"] = float(layer_data[2]) + tensor_layer["H"] = float(layer_data[3]) + tensor_layer["W"] = float(layer_data[4]) + tensor_layer["Cout"] = float(layer_data[5]) + tensor_layer["Kh"] = float(layer_data[7]) + tensor_layer["Kw"] = float(layer_data[8]) + tensor_layer["Sh"] = float(layer_data[9]) + tensor_layer["Sw"] = float(layer_data[10]) + + elif Driver.is_fc(layer_name): + tensor_layer["RA"] = float(layer_data[1]) + tensor_layer["CA"] = float(layer_data[2]) + tensor_layer["RB"] = float(layer_data[3]) + tensor_layer["CB"] = float(layer_data[4]) + + elif not Driver.is_nml(layer_name): # TODO should we store data for NMLs? + print("ERROR: Invalid layer name %s" % layer_name) + exit(1) + + self.__tensor_layers.append(tensor_layer) + layer_file.close() + + + def __parse_tensor_table(self): + if not os.path.isfile(self.__table_filename): + print("ERROR: %s was not found." % self.__table_filename) + exit(1) + table_file = open(self.__table_filename, "r") + line = table_file.readline().strip() + + while line: + # Line here MUST be a header or there's a bug + # Get the description of the layer + assert(line.startswith("**")) + + header_contents = line.split(' ')[1:] + layer_name = header_contents[0] + num_ops = int(header_contents[1]) + col_names = header_contents[2:] + + layer_operations = [] + + # Go through all operations in the layer + for op_count in range(num_ops): + operation_data = defaultdict(str) + + line = table_file.readline().strip() + op_data = line.split(' ') + op_name = op_data[0] + operation_data["Name"] = op_name + + # Number of data items (#s) needs to match up with the # of cols + assert(len(op_data) - 1 == len(col_names)) + + # Go through all data items (each col element) per operation + for i in range(len(col_names)): + operation_data[col_names[i]] = float(op_data[i + 1]) + + layer_operations.append(operation_data) + + self.__tensor_table[layer_name] = layer_operations + line = table_file.readline().strip() + table_file.close() + + + @staticmethod + def is_promise(config_layer): + return float(config_layer.split(' ')[0]) < Driver.fp16_swing + + + def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): + if curr_layer == prev_layer or curr_layer == Driver.ApproxTypes.PROMISE \ + or prev_layer == Driver.ApproxTypes.PROMISE: # No quantization needed + return 0.0, 0.0 + + layer_name = layer_data["Name"] + + # NOTE: Ignoring logic where curr == promise or prev == promise bc + # smartDMA is always true so we'd return near the beginning of the method + + # Get h2f/f2h data using the first tensor operation in the layer + # (which is why order matters in the tensor table) + tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind] + if curr_layer == Driver.ApproxTypes.FP32: + time = tensor_op_row["h2f_time"] + energy = tensor_op_row["h2f_energy"] + elif curr_layer == Driver.ApproxTypes.FP16: + time = tensor_op_row["f2h_time"] + energy = tensor_op_row["f2h_energy"] + + print("Quantization: (%f, %f)" % (time, energy)) + return (time, energy) + + + def __run_promise_simulation(self, swing, layer_data): + layer_name = layer_data["Name"] + patch_factor = 1 + + if Driver.is_conv(layer_name): + rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \ + / (layer_data["Sh"] * layer_data["Sw"]) + cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"] + rows_b = cols_a + cols_b = layer_data["Cout"] + patch_factor = layer_data["Kh"] * layer_data["Kw"] + elif Driver.is_fc(layer_name): + rows_a = layer_data["RA"] + cols_a = layer_data["CA"] + rows_b = cols_a + cols_b = layer_data["CB"] + else: + print("PROMISE can't run whatever this layer is.") + exit(1) + # Run promise simulator + # TODO need to print time and energy in the ptm runner so we can pipe it + output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \ + str(cols_b), str(patch_factor), str(swing)], \ + stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0] + total_time_energy = output.strip().split(',') + + assert(len(total_time_energy) == 2) + print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1])) + return float(total_time_energy[0]), float(total_time_energy[1]) + + + def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind): + tensor_info = self.__tensor_table[layer_name][tensor_ind] + if curr_layer == Driver.ApproxTypes.FP32: + conversion_time = tensor_info["fp32_time"] + conversion_energy = tensor_info["fp32_energy"] + else: + conversion_time = tensor_info["fp16_time"] + conversion_energy = tensor_info["fp16_energy"] + print("GPU: (%f, %f)" % (conversion_time, conversion_energy)) + return (conversion_time, conversion_energy) + + + def __run_simulations(self): + print("run sim") + if not os.path.isfile(self.__config_filename): + print("ERROR: %s was not found" % self.__config_filename) + exit(1) + + config_file = open(self.__config_filename, "r") + + # each line = indepedent configuration + # layers are separated by commas + # tensor ops are separated by spaces + for config in config_file: + config_layers = config.strip().split(',') + prev_layer = Driver.ApproxTypes.FP32 + curr_layer = None + + for layer_ind, config_layer in enumerate(config_layers): # level + layer_data = self.__tensor_layers[layer_ind] # layer + layer_name = layer_data["Name"] + + if Driver.is_promise(config_layer): + print("Running layer %s on PROMISE" % layer_name) + curr_layer = Driver.ApproxTypes.PROMISE + quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, 0, layer_data) + # Compute + time, energy = self.__run_promise_simulation(config_layer, layer_data) + print(time, energy) + self.__aggregate_results[Driver.results_time_key][self.__config_count] += time + self.__aggregate_results[Driver.results_energy_key][self.__config_count] += energy + else: + print("Running layer %s on the GPU" % layer_name) + tensor_ops = config_layer.split(' ') + + total_time = 0 + total_energy = 0 + for tensor_ind, tensor_op in enumerate(tensor_ops): # sublevle + tensor_op = int(tensor_op) + if tensor_op == Driver.fp16_swing: + curr_layer = Driver.ApproxTypes.FP16 + else: + curr_layer = Driver.ApproxTypes.FP32 + quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, tensor_ind, layer_data) + conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_ind) + total_time += quant_time + conv_time + total_energy += quant_energy + conv_energy + + self.__aggregate_results[Driver.results_time_key][self.__config_count] += total_time + self.__aggregate_results[Driver.results_energy_key][self.__config_count] += total_energy + + prev_layer = curr_layer + self.__config_count += 1 + print("\n") + config_file.close() + + + def __display_results(self): + results_file = open(self.__results_filename, "w") + attributes_to_print = [Driver.results_time_key, Driver.results_energy_key] + + for attribute in attributes_to_print: + results_file.write("%s\n" % attribute) + results_file.write("Configuration,Total,Improvement\n") + + baseline_val = self.__aggregate_results[attribute][0] + print(baseline_val) + best_config = None + best_result = None + + for config_ind in range(self.__config_count): + results_file.write("c%d" % config_ind) + time_or_energy_val = self.__aggregate_results[attribute][config_ind] + + # Using repr to keep all decimal digits when writing to file + results_file.write(",%s" % repr(time_or_energy_val)) + results_file.write(",%s\n" % repr(baseline_val / (time_or_energy_val + 0.0001))) + + if not best_result or time_or_energy_val < best_result: + best_result = time_or_energy_val + best_config = config_ind + results_file.write("\nc%d,%s\n\n" % (best_config, repr(self.__aggregate_results[attribute][best_config]))) + results_file.close() - table_file = open(table_filename, "r") - - line = table_file.readline().strip() - - while line: - # Line here MUST be a header or there's a bug - # Get the description of the layer - assert(line.startswith("**")) - header_contents = line.split(' ')[1:] - layer_name = header_contents[0] - num_ops = int(header_contents[1]) - col_names = header_contents[2:] - - # Go through all operations in the layer - for op_count in range(num_ops): - line = table_file.readline().strip() - op_data = line.split(' ') - op_name = op_data[0] - - # Number of data items (#s) needs to match up with the # of cols - assert(len(op_data) - 1 == len(col_names)) - - # Go through all data items (each col element) per operation - for i in range(len(col_names)): - tensor_table[layer_name][op_name][col_names[i]] = op_data[i + 1] - - line = table_file.readline().strip() - - table_file.close() - - -def run_simulations(): - # open configuration file - # open results file - # read through each line in the configuration file - # for each config file line --> parse the comma separated voltage swing levels - # recall: each line = a configuration that works - # for each level - # if promise --> promise runs an entire layer - # quantize, no patching and unpatching - # run on promise - # output the total time and energy - # else - # for each sublevel (separated by spaces) - # quantize - # run - # keep track of total time and energy --> update as needed - # output the total time and energy - -# quantization: we always have smart dma -# need to search stuff up -# $layer = a map of elements -# stores the layer name, then if __name__ == "__main__": - if len(sys.argv) != 4): + if len(sys.argv) != 5: print("Usage: python driver.py <layer info> <tensor info> <configurations> <results file>") exit(1) - + Driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]).driver()