diff --git a/llvm/projects/soc_simulator/src/driver_new_config_v2.py b/llvm/projects/soc_simulator/src/driver_new_config_v2.py index 7e3a91d309e582945f56fa05375e7ae35ddb81c4..ad60768cdd27606bb5f9d3a8bf030b9118e92cc1 100644 --- a/llvm/projects/soc_simulator/src/driver_new_config_v2.py +++ b/llvm/projects/soc_simulator/src/driver_new_config_v2.py @@ -34,7 +34,8 @@ class Driver: # Operation names need to be stored in order of insertion self.__tensor_table = defaultdict(lambda: list(defaultdict(str))) - self.__conf_results = {} # {conf name: (first line, [[layer value if promise], [tensor vals if gpu]])} + self.__conf_results = [] # indexed + #self.__conf_results = {} # {conf name: (first line, [[layer value if promise], [tensor vals if gpu]])} @staticmethod @@ -77,7 +78,6 @@ class Driver: if not os.path.isfile(self.__layer_filename): print("ERROR: %s was not found." % self.__layer_filename) exit(1) - layer_file = open(self.__layer_filename, "r") for line in layer_file: layer_data = line.strip().split(',') @@ -139,14 +139,10 @@ class Driver: operation_data["Name"] = op_name # Number of data items (#s) needs to match up with the # of cols - #print(len(op_data) - 1, len(col_names)) - #print(op_data) - #print(col_names) + assert(len(op_data) - 1 == len(col_names)) - #assert(len(op_data) - 1 == len(col_names)) # Go through all data items (each col element) per operation for i in range(len(col_names)): - #print(col_names[i], float(op_data[i + 1])) operation_data[col_names[i]] = float(op_data[i + 1]) layer_operations.append(operation_data) @@ -180,21 +176,24 @@ class Driver: line = config_file.readline().strip() first_line = line conf_name = line.split(' ')[0] + print("CONF NAME: %s" % conf_name) assert(conf_name.startswith("conf")) line = config_file.readline().strip() while line != "-----": + layer_as_lst = line.split(' ') + layer_results = [] # Skip softmax if line.find("softmax") != -1: + layer_results.append((0, 0, ' '.join(layer_as_lst[2:]))) + curr_conf_results.append((layer_as_lst[1], layer_results)) line = config_file.readline().strip() continue - layer_as_lst = line.split(' ') layer_ind = int(layer_as_lst[0]) - 1 layer_table_data = self.__tensor_layers[layer_ind] layer_name = layer_table_data["Name"] - layer_results = [] if Driver.is_promise(layer_as_lst[1]): print("Running layer %s on PROMISE" % layer_name) curr_layer = Driver.PrecisionTypes.PROMISE @@ -209,13 +208,11 @@ class Driver: time, energy = self.__run_promise_simulation(param_val, layer_table_data) total_time += time total_energy += energy - layer_results.append((total_time, total_energy, ' '.join(layer_as_lst[1:]))) + layer_results.append((total_time, total_energy, ' '.join(layer_as_lst[2:]))) elif Driver.is_gpu(layer_as_lst[1]): print("Running layer %s on the GPU" % layer_name) - total_time = 0 - total_energy = 0 tensor_count = 0 # 3 elements per tensor operation @@ -225,45 +222,66 @@ class Driver: op_number = layer_as_lst[i + 2] approx_type = None - - if precision_type == "fp16" or line.find("fp16") != -1: + if line.find("fp16") != -1: curr_layer = Driver.PrecisionTypes.FP16 - elif precision_type == "fp32" or line.find("fp32") != -1: + elif line.find("fp32") != -1: curr_layer = Driver.PrecisionTypes.FP32 - elif precision_type == "perf" or precision_type == "samp": # Handle approx type + + if precision_type == "perf" or precision_type == "samp": # Handle approx type if precision_type == "perf": approx_type = Driver.ApproxTypes.PERF elif precision_type == "samp": approx_type = Driver.ApproxTypes.SAMP - if line.find("fp16") != -1: curr_layer = Driver.PrecisionTypes.FP16 elif line.find("fp32") != -1: curr_layer = Driver.PrecisionTypes.FP32 - quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, \ - tensor_count, layer_table_data) + quant_time, quant_energy = self.__quantize(precision_type, op_number, curr_layer, prev_layer, tensor_count, layer_table_data) + if quant_time != 0: + assert i == 2 and layer_ind == 0 conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, \ tensor_count, approx_type, op_number) layer_results.append((quant_time + conv_time, quant_energy + conv_energy, ' '.join(layer_as_lst[i : i + 3]))) - total_time += quant_time + conv_time - total_energy += quant_energy + conv_energy prev_layer = curr_layer tensor_count += 1 line = config_file.readline().strip() prev_layer = curr_layer - curr_conf_results.append(layer_results) - - self.__conf_results[conf_name] = (first_line, curr_conf_results) + curr_conf_results.append((layer_as_lst[1], layer_results)) + + # artificially generate the fp16 baseline + if not self.__conf_results: # we're appending the baseline + + # need to generate an artificial fp16 baseline + self.fp16_baseline = [] + for layer_ind, (hardware, layer) in enumerate(curr_conf_results): + if len(layer) == 1 and layer[0][2].find("softmax") != -1: continue + fp16_layer = [] + print(layer_ind, hardware, layer) + layer_table_data = self.__tensor_layers[layer_ind] + layer_name = layer_table_data["Name"] + + for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer): + # for each operation --> include quantization time + quant_time, quant_energy = 0,0 + if layer_ind == 0: + quant_time, quant_energy = self.__quantize("fp16", "1", Driver.PrecisionTypes.FP16, None, 0, layer_table_data) + print("FP16 QUANT: ", quant_time, quant_energy) + tensor_info = self.__tensor_table[layer_name][tensor_ind] + fp16_time = tensor_info["fp16_time"] + quant_time + fp16_energy = tensor_info["fp16_energy"] + quant_energy + fp16_layer.append((fp16_time, fp16_energy, tensor_op.replace("fp32", "fp16"))) + self.fp16_baseline.append((hardware, fp16_layer)) + print(self.fp16_baseline) + self.__conf_results.append( (first_line, curr_conf_results) ) line = config_file.readline().strip() config_file.close() - #print("AGGREGATE RESULTS", self.__aggregate_results) - def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): + + def __quantize(self, precision_type, op_number, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): if curr_layer == prev_layer or curr_layer == Driver.PrecisionTypes.PROMISE \ or prev_layer == Driver.PrecisionTypes.PROMISE: return 0.0, 0.0 - layer_name = layer_data["Name"] # NOTE: Ignoring logic where curr == promise or prev == promise bc @@ -275,15 +293,19 @@ class Driver: time_key = None energy_key = None + if op_number == "1": + lookup_key = "_" #lookup_key = precision_type + else: + lookup_key = "_" + precision_type + str(op_number) + "_" + if curr_layer == Driver.PrecisionTypes.FP32: - time_key = "h2f_time" - energy_key = "h2f_energy" + time_key = "h2f%stime" % lookup_key + energy_key = "h2f%senergy" % lookup_key elif curr_layer == Driver.PrecisionTypes.FP16: - time_key = "f2h_time" - energy_key = "f2h_energy" + time_key = "f2h%stime" % lookup_key + energy_key = "f2h%senergy" % lookup_key time = tensor_op_row[time_key] energy = tensor_op_row[energy_key] - print(time_key, energy_key, time, energy) print("Quantization: (%f, %f)" % (time, energy)) return (time, energy) @@ -315,14 +337,15 @@ class Driver: total_time_energy = output.strip().split(',') assert(len(total_time_energy) == 2) - print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1])) return float(total_time_energy[0]), float(total_time_energy[1]) def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind, \ approx_type = None, knob_number = None): tensor_info = self.__tensor_table[layer_name][tensor_ind] - + #print(tensor_info) + #print(layer_name) + #print(tensor_ind) time_key = None energy_key = None @@ -334,12 +357,10 @@ class Driver: approx_type_str = "samp" if curr_layer == Driver.PrecisionTypes.FP32: - print("in fp32", approx_type_str) time_key = "fp32_%s%s_time" % (approx_type_str, knob_number) energy_key = "fp32_%s%s_energy" % (approx_type_str, knob_number) elif curr_layer == Driver.PrecisionTypes.FP16: - print("in fp16", approx_type_str) time_key = "fp16_%s%s_time" % (approx_type_str, knob_number) energy_key = "fp16_%s%s_energy" % (approx_type_str, knob_number) @@ -351,11 +372,10 @@ class Driver: elif curr_layer == Driver.PrecisionTypes.FP16: time_key = "fp16_time" energy_key = "fp16_energy" - #print(time_key, energy_key) + print(time_key, energy_key) conversion_time = tensor_info[time_key] conversion_energy = tensor_info[energy_key] - #print(conversion_time, conversion_energy) - print("GPU: (%f, %f)" % (conversion_time, conversion_energy)) + #print("GPU: (%f, %f)\n" % (conversion_time, conversion_energy)) return conversion_time, conversion_energy @@ -375,12 +395,13 @@ class Driver: new_header = [conf_name] new_header.append(repr(time_speedup)) new_header.append(repr(energy_speedup)) - new_header.append(first_line_lst[-1]) - new_header.append(first_line_lst[-2]) + new_header.append(repr(abs(float(first_line_lst[-2])))) + new_header.append(repr(abs(float(first_line_lst[-1])))) conf_str.append(' '.join(new_header)) - - for ind, layer in enumerate(layers): + + for ind, (hardware, layer) in enumerate(layers): layer_lst = [str(ind + 1)] + layer_lst.append(hardware) for op_time, op_energy, tensor_op in layer: layer_lst.append(tensor_op) conf_str.append(' '.join(layer_lst)) @@ -390,62 +411,67 @@ class Driver: baseline_conf = None baseline_total_time = baseline_total_energy = 0 - def get_baseline_times_enegies(): + def get_baseline_times_energies(conf): curr_time = curr_energy = 0 - for layer in baseline_conf[1]: + for hardware, layer in conf[1]: for op_time, op_energy, tensor_op in layer: curr_time += op_time curr_energy += op_energy return curr_time, curr_energy - def get_final_times_energies_conf(curr_conf): + def get_final_times_energies_conf(curr_conf, curr_conf_name): + print("_____________ NEW CONFIGURATION ___________") final_time = final_energy = 0 - final_conf = [] + final_conf = [] # List (conf) of lists (layers) of tuples (operation data) - for layer_ind, layer in enumerate(curr_conf[1]): + #for hardware, layer in self.fp16_baseline: + #print(hardware, layer) + for layer_ind, (hardware, layer) in enumerate(curr_conf[1]): final_conf_layer = [] for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer): - baseline_time, baseline_energy, baseline_op = baseline_conf[1][layer_ind][tensor_ind] + if tensor_op.find("softmax") != -1: + continue + # layer name, operation name, val name + baseline_time = self.fp16_baseline[layer_ind][1][tensor_ind][0] + baseline_energy = self.fp16_baseline[layer_ind][1][tensor_ind][1] + baseline_op = self.fp16_baseline[layer_ind][1][tensor_ind][2] + print(baseline_time, baseline_energy, baseline_op) + print(op_time, tensor_op) final_tensor_op = tensor_op + #print(op_time > baseline_time) if op_time > baseline_time: + print("**************** BIGGER ******************") final_time += baseline_time - final_tensor_op = baseline_op - else: - final_time += op_time - # Ignoring bigger energies for now - ''' - if op_energy > baseline_energy: - print("BIGGER ENERGY") final_energy += baseline_energy - final_tensor_op = baseline_op + final_tensor_op = baseline_op else: + final_time += op_time final_energy += op_energy - ''' - final_energy += op_energy final_conf_layer.append((None, None, final_tensor_op)) # Don't care about the times and energies when writing - final_conf.append(final_conf_layer) + final_conf.append((hardware, final_conf_layer)) + print("\n") return final_time, final_energy, (curr_conf[0], final_conf) + conf_index = 0 + print("RESULTS") for line in config_file: if line.startswith("conf"): orig_line_lst = line.split(' ') conf_name = orig_line_lst[0] - + if not baseline_conf: - baseline_conf = self.__conf_results[conf_name] - print("FOUND baseline", baseline_conf) - baseline_total_time, baseline_total_energy = get_baseline_times_enegies() - results_file.write("%s\n" % repr(baseline_total_time)) # write baseline time to top of file + baseline_conf = self.__conf_results[conf_index] #conf_name] + baseline_total_time, baseline_total_energy = get_baseline_times_energies(baseline_conf) + results_file.write("%s\n" % repr(baseline_total_time)) write_conf_to_file(conf_name, baseline_conf, 1, 1) else: - curr_conf = self.__conf_results[conf_name] - final_time, final_energy, curr_conf = get_final_times_energies_conf(curr_conf) - assert(final_time <= baseline_total_time) - #assert(final_energy <= baseline_total_energy) - write_conf_to_file(conf_name, curr_conf, final_time / baseline_total_time, final_energy / baseline_total_energy) - + curr_conf = self.__conf_results[conf_index] #conf_name] + final_time, final_energy, = get_baseline_times_energies(curr_conf) + #final_time, final_energy, curr_conf = get_final_times_energies_conf(curr_conf, conf_name) + write_conf_to_file(conf_name, curr_conf, baseline_total_time / final_time, baseline_total_energy / final_energy) + conf_index += 1 results_file.close() config_file.close()