From 18224f2d0bd5a8738c485a77976aab5b0584dcb7 Mon Sep 17 00:00:00 2001 From: Elizabeth <hashim.sharif91@gmail.com> Date: Sat, 16 Nov 2019 11:50:17 -0600 Subject: [PATCH] Added quantization logic to fp16 baseline --- .../src/driver_new_config_fp16_repl.py | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/llvm/projects/soc_simulator/src/driver_new_config_fp16_repl.py b/llvm/projects/soc_simulator/src/driver_new_config_fp16_repl.py index 28a883d160..a7fca62564 100644 --- a/llvm/projects/soc_simulator/src/driver_new_config_fp16_repl.py +++ b/llvm/projects/soc_simulator/src/driver_new_config_fp16_repl.py @@ -220,7 +220,7 @@ class Driver: op_type = layer_as_lst[i] precision_type = layer_as_lst[i + 1] op_number = layer_as_lst[i + 2] - print(' '.join(layer_as_lst[i : i + 3])) + #print(' '.join(layer_as_lst[i : i + 3])) approx_type = None if line.find("fp16") != -1: @@ -250,29 +250,37 @@ class Driver: prev_layer = curr_layer curr_conf_results.append((layer_as_lst[1], layer_results)) - # artificially generate the fp16 baseline if not self.__conf_results: # we're appending the baseline - - # need to generate an artificial fp16 baseline + # need to find the fp16 baseline self.fp16_baseline = [] + + prev_layer = Driver.PrecisionTypes.FP32 + curr_layer = None + + has_quantized = False for layer_ind, (hardware, layer) in enumerate(curr_conf_results): if len(layer) == 1 and layer[0][2].find("softmax") != -1: continue fp16_layer = [] - print(layer_ind, hardware, layer) + #print(layer_ind, hardware, layer) layer_table_data = self.__tensor_layers[layer_ind] layer_name = layer_table_data["Name"] for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer): - quant_time, quant_energy = 0,0 - if layer_ind == 0: # force quantization - quant_time, quant_energy = self.__quantize("fp16", "1", Driver.PrecisionTypes.FP16, None, 0, layer_table_data) - print("FP16 QUANT: ", quant_time, quant_energy) + curr_layer = Driver.PrecisionTypes.FP16 # always + + quant_time, quant_energy = self.__quantize("fp16", "1", curr_layer, prev_layer, tensor_ind, layer_table_data) + if quant_time != 0: + assert not has_quantized + has_quantized = True tensor_info = self.__tensor_table[layer_name][tensor_ind] fp16_time = tensor_info["fp16_time"] + quant_time fp16_energy = tensor_info["fp16_energy"] + quant_energy fp16_layer.append((fp16_time, fp16_energy, tensor_op.replace("fp32", "fp16"))) + prev_layer = curr_layer + + prev_layer = curr_layer self.fp16_baseline.append((hardware, fp16_layer)) - print(self.fp16_baseline) + #print(self.fp16_baseline) self.__conf_results.append( (first_line, curr_conf_results) ) line = config_file.readline().strip() config_file.close() @@ -306,7 +314,8 @@ class Driver: energy_key = "f2h%senergy" % lookup_key time = tensor_op_row[time_key] energy = tensor_op_row[energy_key] - print("Quantization: (%f, %f)" % (time, energy)) + #print(time_key, energy_key) + #print("Quantization: (%f, %f)" % (time, energy)) return (time, energy) @@ -372,7 +381,7 @@ class Driver: elif curr_layer == Driver.PrecisionTypes.FP16: time_key = "fp16_time" energy_key = "fp16_energy" - print(time_key, energy_key) + #print(time_key, energy_key) conversion_time = tensor_info[time_key] conversion_energy = tensor_info[energy_key] #print("GPU: (%f, %f)\n" % (conversion_time, conversion_energy)) @@ -420,7 +429,6 @@ class Driver: return curr_time, curr_energy def get_final_times_energies_conf(curr_conf, curr_conf_name): - #print("_____________ NEW CONFIGURATION ___________") final_time = final_energy = 0 final_conf = [] # List (conf) of lists (layers) of tuples (operation data) @@ -444,8 +452,8 @@ class Driver: if op_time > baseline_time: print("**************** BIGGER ******************") print(curr_conf_name) - print(baseline_time, baseline_energy, baseline_op) - print(op_time, tensor_op) + print(baseline_time, baseline_energy, baseline_op, layer_ind) + print(op_time, tensor_op, layer_ind) final_time += baseline_time final_energy += baseline_energy final_tensor_op = baseline_op @@ -454,7 +462,7 @@ class Driver: final_energy += op_energy final_conf_layer.append((None, None, final_tensor_op)) # Don't care about the times and energies when writing final_conf.append((hardware, final_conf_layer)) - print("\n") + #print("\n") return final_time, final_energy, (curr_conf[0], final_conf) conf_index = 0 -- GitLab