diff --git a/llvm/projects/soc_simulator/src/driver.py b/llvm/projects/soc_simulator/src/driver.py index 895a72d9e90f1a0559716944d598cb410e8d0d9b..5de17ef673be2602a02398663f9165e04ece26d9 100644 --- a/llvm/projects/soc_simulator/src/driver.py +++ b/llvm/projects/soc_simulator/src/driver.py @@ -6,9 +6,9 @@ import sys class Driver: def driver(self): self.parse_tensor_layer_file() - #self.parse_tensor_table() - #self.run_simulations() - #self.display_results() + self.parse_tensor_table() + self.run_simulations() + self.display_results() def __init__(self, layer_filename, table_filename, config_filename, results_filename): self.__layer_filename = layer_filename @@ -21,6 +21,14 @@ class Driver: # since it corresponds to the data in the configuration file self.__tensor_layers = [] + # [layer_name][operation_name][cols] + # Operation names need to be stored in order of insertion + self.__tensor_table = defaultdict(lambda: list(defaultdict(str))) + + # [Time/Energy][number corresponding to order the layer config was read in] = time/energy + self.__aggregate_results = defaultdict(lambda: defaultdict(float)) + self.__config_count = 0 + @staticmethod def is_conv(operation_name): return operation_name.startswith("Conv") @@ -33,12 +41,6 @@ class Driver: def is_fc(operation_name): return operation_name.startswith("FC") - def driver(self): - self.parse_tensor_layer_file() - #self.parse_tensor_table() - #self.run_simulations() - #self.display_results() - def parse_tensor_layer_file(self): if not os.path.isfile(self.__layer_filename): print("ERROR: %s was not found." % self.__layer_filename) @@ -75,234 +77,206 @@ class Driver: self.__tensor_layers.append(tensor_layer) layer_file.close() - ''' -# [layer_name][operation_name][cols] -# Operation names need to be stored in order of insertion - tensor_table = defaultdict(lambda: list(defaultdict(str))) - -def parse_tensor_table(table_filename): - if not os.path.isfile(table_filename): - print("ERROR: %s was not found." % table_filename) - exit(1) - table_file = open(table_filename, "r") - line = table_file.readline().strip() - while line: - # Line here MUST be a header or there's a bug - # Get the description of the layer - assert(line.startswith("**")) + def parse_tensor_table(self): + if not os.path.isfile(self.__table_filename): + print("ERROR: %s was not found." % self.__table_filename) + exit(1) + table_file = open(self.__table_filename, "r") + line = table_file.readline().strip() + + while line: + # Line here MUST be a header or there's a bug + # Get the description of the layer + assert(line.startswith("**")) + + header_contents = line.split(' ')[1:] + layer_name = header_contents[0] + num_ops = int(header_contents[1]) + col_names = header_contents[2:] + + layer_operations = [] - header_contents = line.split(' ')[1:] - layer_name = header_contents[0] - num_ops = int(header_contents[1]) - col_names = header_contents[2:] + # Go through all operations in the layer + for op_count in range(num_ops): + operation_data = defaultdict(str) - layer_operations = [] + line = table_file.readline().strip() + op_data = line.split(' ') + op_name = op_data[0] + operation_data["Name"] = op_name - # Go through all operations in the layer - for op_count in range(num_ops): - operation_data = defaultdict(str) + # Number of data items (#s) needs to match up with the # of cols + assert(len(op_data) - 1 == len(col_names)) + # Go through all data items (each col element) per operation + for i in range(len(col_names)): + operation_data[col_names[i]] = float(op_data[i + 1]) + + layer_operations.append(operation_data) + + self.__tensor_table[layer_name] = layer_operations line = table_file.readline().strip() - op_data = line.split(' ') - op_name = op_data[0] - operation_data["Name"] = op_name + table_file.close() - # Number of data items (#s) needs to match up with the # of cols - assert(len(op_data) - 1 == len(col_names)) + fp16_swing = 8 - # Go through all data items (each col element) per operation - for i in range(len(col_names)): - operation_data[col_names[i]] = float(op_data[i + 1]) + class ApproxTypes: + FP16 = 0 + FP32 = 1 + PROMISE = 2 - layer_operations.append(operation_data) + @staticmethod + def is_promise(config_layer): + return float(config_layer.split(' ')[0]) < Driver.fp16_swing + + def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): + if curr_layer == prev_layer or curr_layer == Driver.ApproxTypes.PROMISE \ + or prev_layer == Driver.ApproxTypes.PROMISE: # No quantization needed + return 0.0, 0.0 + + layer_name = layer_data["Name"] + + # NOTE: Ignoring logic where curr == promise or prev == promise bc + # smartDMA is always true so we'd return near the beginning of the method + + # Get h2f/f2h data using the first tensor operation in the layer + # (which is why order matters in the tensor table) + tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind] + if curr_layer == Driver.ApproxTypes.FP32: + time = tensor_op_row["h2f_time"] + energy = tensor_op_row["h2f_energy"] + elif curr_layer == Driver.ApproxTypes.FP16: + time = tensor_op_row["f2h_time"] + energy = tensor_op_row["f2h_energy"] + + print("Quantization: (%f, %f)" % (time, energy)) + return (time, energy) + + def __run_promise_simulation(self, swing, layer_data): + layer_name = layer_data["Name"] + patch_factor = 1 + + if Driver.is_conv(layer_name): + rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \ + / (layer_data["Sh"] * layer_data["Sw"]) + cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"] + rows_b = cols_a + cols_b = layer_data["Cout"] + patch_factor = layer_data["Kh"] * layer_data["Kw"] + elif Driver.is_fc(layer_name): + rows_a = layer_data["RA"] + cols_a = layer_data["CA"] + rows_b = cols_a + cols_b = layer_data["CB"] + else: + print("PROMISE can't run whatever this layer is.") + exit(1) + # Run promise simulator + # TODO need to print time and energy in the ptm runner so we can pipe it + output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \ + str(cols_b), str(patch_factor), str(swing)], \ + stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0] + total_time_energy = output.strip().split(',') + + assert(len(total_time_energy) == 2) + print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1])) + return float(total_time_energy[0]), float(total_time_energy[1]) + + def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind): + tensor_info = self.__tensor_table[layer_name][tensor_ind] + if curr_layer == Driver.ApproxTypes.FP32: + conversion_time = tensor_info["fp32_time"] + conversion_energy = tensor_info["fp32_energy"] + else: + conversion_time = tensor_info["fp16_time"] + conversion_energy = tensor_info["fp16_energy"] + print("GPU: (%f, %f)" % (conversion_time, conversion_energy)) + return (conversion_time, conversion_energy) + + # Default dict of default dicts + results_time_key = "Time" + results_energy_key = "Energy" + + def run_simulations(self): + print("run sim") + if not os.path.isfile(self.__config_filename): + print("ERROR: %s was not found" % self.__config_filename) + exit(1) - tensor_table[layer_name] = layer_operations - line = table_file.readline().strip() - table_file.close() - -fp16_swing = 8 - -class ApproxTypes: - FP16 = 0 - FP32 = 1 - PROMISE = 2 - -def get_approx_type(approx_type): - if approx_type == 0: - return "fp16" - elif approx_type == 1: - return "fp32" - return "promise" - -def Driver.is_promise(config_layer): - # TODO overhead in call to split? - return float(config_layer.split(' ')[0]) < fp16_swing - - -def quantize(curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): - #print(get_approx_type(curr_layer), get_approx_type(prev_layer)) - if curr_layer == prev_layer or curr_layer == ApproxTypes.PROMISE \ - or prev_layer == ApproxTypes.PROMISE: # No quantization needed - return 0.0, 0.0 - - layer_name = layer_data["Name"] - - # NOTE: Ignoring logic where curr == promise or prev == promise bc - # smartDMA is always true so we'd return near the beginning of the method - - # Get h2f/f2h data using the first tensor operation in the layer - # (which is why order matters in the tensor table) - tensor_op_row = tensor_table[layer_name][h2f_f2h_operation_ind] - if curr_layer == ApproxTypes.FP32: - time = tensor_op_row["h2f_time"] - energy = tensor_op_row["h2f_energy"] - elif curr_layer == ApproxTypes.FP16: - time = tensor_op_row["f2h_time"] - energy = tensor_op_row["f2h_energy"] - - print("Quantization: (%f, %f)" % (time, energy)) - return (time, energy) - -def run_promise_simulation(swing, layer_data): - layer_name = layer_data["Name"] - patch_factor = 1 - - if Driver.is_conv(layer_name): - rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \ - / (layer_data["Sh"] * layer_data["Sw"]) - cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"] - rows_b = cols_a - cols_b = layer_data["Cout"] - patch_factor = layer_data["Kh"] * layer_data["Kw"] - elif Driver.is_fc(layer_name): - rows_a = layer_data["RA"] - cols_a = layer_data["CA"] - rows_b = cols_a - cols_b = layer_data["CB"] - else: - print("PROMISE can't run whatever this layer is.") - exit(1) - #print("[%f x %f] x [%f x %f] : %f" % (rows_a, cols_a, rows_b, cols_b, swing)) - # Run promise simulator - # TODO need to print time and energy in the ptm runner so we can pipe it - output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \ - str(cols_b), str(patch_factor), str(swing)], \ - stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0] - total_time_energy = output.strip().split(',') - - assert(len(total_time_energy) == 2) - print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1])) - return float(total_time_energy[0]), float(total_time_energy[1]) - - -def run_gpu_simulation(curr_layer, layer_name, tensor_ind): - tensor_info = tensor_table[layer_name][tensor_ind] - if curr_layer == ApproxTypes.FP32: - conversion_time = tensor_info["fp32_time"] - conversion_energy = tensor_info["fp32_energy"] - else: - conversion_time = tensor_info["fp16_time"] - conversion_energy = tensor_info["fp16_energy"] - print("GPU: (%f, %f)" % (conversion_time, conversion_energy)) - return (conversion_time, conversion_energy) - -# Default dict of default dicts -results_time_key = "Time" -results_energy_key = "Energy" -# [Time/Energy][number corresponding to order the layer config was read in] = time/energy -aggregate_results = defaultdict(lambda: defaultdict(float)) -config_count = 0 - -def run_simulations(config_filename): - global config_count - - if not os.path.isfile(config_filename): - print("ERROR: %s was not found" % config_filename) - exit(1) + config_file = open(self.__config_filename, "r") + + # each line = indepedent configuration + # layers are separated by commas + # tensor ops are separated by spaces + for config in config_file: + config_layers = config.strip().split(',') + prev_layer = Driver.ApproxTypes.FP32 + curr_layer = None + + for layer_ind, config_layer in enumerate(config_layers): # level + layer_data = self.__tensor_layers[layer_ind] # layer + layer_name = layer_data["Name"] + + if Driver.is_promise(config_layer): + print("Running layer %s on PROMISE" % layer_name) + curr_layer = Driver.ApproxTypes.PROMISE + quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, 0, layer_data) + # Compute + time, energy = self.__run_promise_simulation(config_layer, layer_data) + print(time, energy) + self.__aggregate_results[Driver.results_time_key][self.__config_count] += time + self.__aggregate_results[Driver.results_energy_key][self.__config_count] += energy + else: + print("Running layer %s on the GPU" % layer_name) + tensor_ops = config_layer.split(' ') + + total_time = 0 + total_energy = 0 + for tensor_ind, tensor_op in enumerate(tensor_ops): # sublevle + tensor_op = int(tensor_op) + if tensor_op == Driver.fp16_swing: + curr_layer = Driver.ApproxTypes.FP16 + else: + curr_layer = Driver.ApproxTypes.FP32 + quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, tensor_ind, layer_data) + conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_ind) + total_time += quant_time + conv_time + total_energy += quant_energy + conv_energy + + self.__aggregate_results[Driver.results_time_key][self.__config_count] += total_time + self.__aggregate_results[Driver.results_energy_key][self.__config_count] += total_energy + + prev_layer = curr_layer + self.__config_count += 1 + print("\n") + config_file.close() + + def display_results(self): + results_file = open(self.__results_filename, "w") + attributes_to_print = [Driver.results_time_key, Driver.results_energy_key] + + for attribute in attributes_to_print: + results_file.write("%s\n" % attribute) + results_file.write("Configuration,Total,Improvement\n") + + baseline_val = self.__aggregate_results[attribute][0] + print(baseline_val) + best_config = None + best_result = None + + for config_ind in range(self.__config_count): + results_file.write("c%d" % config_ind) + time_or_energy_val = self.__aggregate_results[attribute][config_ind] + results_file.write(",%f" % time_or_energy_val) + results_file.write(",%f\n" % (baseline_val / (time_or_energy_val + 0.0001))) + + if not best_result or time_or_energy_val < best_result: + best_result = time_or_energy_val + best_config = config_ind + results_file.write("\nc%d,%f\n\n" % (best_config, self.__aggregate_results[attribute][best_config])) + results_file.close() - config_file = open(config_filename, "r") - - # each line = indepedent configuration - # layers are separated by commas - # tensor ops are separated by spaces - for config in config_file: - config_layers = config.strip().split(',') - prev_layer = ApproxTypes.FP32 - curr_layer = None - - for layer_ind, config_layer in enumerate(config_layers): # level - layer_data = tensor_layers[layer_ind] # layer - layer_name = layer_data["Name"] - - if Driver.is_promise(config_layer): - print("Running layer %s on PROMISE" % layer_name) - curr_layer = ApproxTypes.PROMISE - quant_time, quant_energy = quantize(curr_layer, prev_layer, 0, layer_data) - # Compute - time, energy = run_promise_simulation(config_layer, layer_data) - print(time, energy) - aggregate_results[results_time_key][config_count] += time - aggregate_results[results_energy_key][config_count] += energy - else: - print("Running layer %s on the GPU" % layer_name) - tensor_ops = config_layer.split(' ') - - total_time = 0 - total_energy = 0 - for tensor_ind, tensor_op in enumerate(tensor_ops): # sublevle - tensor_op = int(tensor_op) - if tensor_op == fp16_swing: - curr_layer = ApproxTypes.FP16 - else: - curr_layer = ApproxTypes.FP32 - quant_time, quant_energy = quantize(curr_layer, prev_layer, tensor_ind, \ - layer_data) - conv_time, conv_energy = run_gpu_simulation(curr_layer, layer_name, tensor_ind) - total_time += quant_time + conv_time - total_energy += quant_energy + conv_energy - - aggregate_results[results_time_key][config_count] += total_time - aggregate_results[results_energy_key][config_count] += total_energy - - prev_layer = curr_layer - config_count += 1 - print("\n") - config_file.close() - - -def display_results(results_filename): - results_file = open(results_filename, "w") - attributes_to_print = [results_time_key, results_energy_key] - - for attribute in attributes_to_print: - results_file.write("%s\n" % attribute) - results_file.write("Configuration,Total,Improvement\n") - - baseline_val = aggregate_results[attribute][0] - print(baseline_val) - best_config = None - best_result = None - - for config_ind in range(config_count): - results_file.write("c%d" % config_ind) - time_or_energy_val = aggregate_results[attribute][config_ind] - results_file.write(",%f" % time_or_energy_val) - results_file.write(",%f\n" % (baseline_val / (time_or_energy_val + 0.0001))) - - if not best_result or time_or_energy_val < best_result: - best_result = time_or_energy_val - best_config = config_ind - results_file.write("\nc%d,%f\n\n" % (best_config, aggregate_results[attribute][best_config])) - results_file.close() - -def driver(tensor_layers_file, tensor_table_file, conf_file, output_file): - print(tensor_layers_file, tensor_table_file, conf_file, output_file) - parse_tensor_layer_file(tensor_layers_file) - parse_tensor_table(tensor_table_file) - run_simulations(conf_file) - display_results(output_file) -''' if __name__ == "__main__": if len(sys.argv) != 5: print("Usage: python driver.py <layer info> <tensor info> <configurations> <results file>")