diff --git a/llvm/projects/soc_simulator/src/driver.py b/llvm/projects/soc_simulator/src/driver.py index 6f224c6c62aaed338afe9732c39484f98957a6a2..85f17d687d645f99c6c95bf33a87b63004635b29 100644 --- a/llvm/projects/soc_simulator/src/driver.py +++ b/llvm/projects/soc_simulator/src/driver.py @@ -38,10 +38,10 @@ def parse_tensor_layer_file(layer_filename): tensor_layer["H"] = float(layer_data[3]) tensor_layer["W"] = float(layer_data[4]) tensor_layer["Cout"] = float(layer_data[5]) - tensor_layer["Kh"] = float(layer_data[6]) - tensor_layer["Kw"] = float(layer_data[7]) - tensor_layer["Sh"] = float(layer_data[8]) - tensor_layer["Sw"] = float(layer_data[9]) + tensor_layer["Kh"] = float(layer_data[7]) + tensor_layer["Kw"] = float(layer_data[8]) + tensor_layer["Sh"] = float(layer_data[9]) + tensor_layer["Sw"] = float(layer_data[10]) elif is_fc(layer_name): tensor_layer["RA"] = float(layer_data[1]) @@ -102,34 +102,31 @@ def parse_tensor_table(table_filename): table_file.close() fp16_swing = 8 -iterations = 10 class ApproxTypes: FP16 = 0 FP32 = 1 PROMISE = 2 +def get_approx_type(approx_type): + if approx_type == 0: + return "fp16" + elif approx_type == 1: + return "fp32" + return "promise" + def is_promise(config_layer): # TODO overhead in call to split? return float(config_layer.split(' ')[0]) < fp16_swing -# NOTE smart_dma is always true + def quantize(curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): + print(get_approx_type(curr_layer), get_approx_type(prev_layer)) if curr_layer == prev_layer or curr_layer == ApproxTypes.PROMISE \ or prev_layer == ApproxTypes.PROMISE: # No quantization needed return 0.0, 0.0 layer_name = layer_data["Name"] - ''' - if is_conv(layer_name): - size = layer_data["N"] * layer_data["Cin"] * layer_data["H"] * layer_data["W"] \ - + layer_data["Cout"] * layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"] - elif is_fc(layer_name): - size = layer_data["RA"] * layer_data["CA"] + layer_data["RB"] * layer_data["CB"] - elif not is_nml(layer_name): - print("ERROR: Invalid layer name %s" % layer_name) - exit(1) - ''' # NOTE: Ignoring logic where curr == promise or prev == promise bc # smartDMA is always true so we'd return near the beginning of the method @@ -138,17 +135,23 @@ def quantize(curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): # (which is why order matters in the tensor table) tensor_op_row = tensor_table[layer_name][h2f_f2h_operation_ind] if curr_layer == ApproxTypes.FP32: - return tensor_op_row["h2f_time"], tensor_op_row["h2f_energy"] + time = tensor_op_row["h2f_time"] + energy = tensor_op_row["h2f_energy"] elif curr_layer == ApproxTypes.FP16: - return tensor_op_row["f2h_time"], tensor_op_row["f2h_energy"] - assert(False) # Error: Should never reach this section + time = tensor_op_row["f2h_time"] + energy = tensor_op_row["f2h_energy"] + print("Quantization: (%f, %f)" % (time, energy)) + return (time, energy) def run_promise_simulation(swing, layer_data): layer_name = layer_data["Name"] patch_factor = 1 if is_conv(layer_name): + print("is conv") + print(layer_data["N"], layer_data["H"], layer_data["W"], layer_data["Sh"], layer_data["Sw"], layer_data["Cin"], layer_data["Kh"], layer_data["Kw"], layer_data["Cout"]) + rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \ / (layer_data["Sh"] * layer_data["Sw"]) cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"] @@ -156,6 +159,7 @@ def run_promise_simulation(swing, layer_data): cols_b = layer_data["Cout"] patch_factor = layer_data["Kh"] * layer_data["Kw"] elif is_fc(layer_name): + print("is fc") rows_a = layer_data["RA"] cols_a = layer_data["CA"] rows_b = cols_a @@ -163,7 +167,7 @@ def run_promise_simulation(swing, layer_data): else: print("PROMISE can't run whatever this layer is.") exit(1) - + #print("[%f x %f] x [%f x %f] : %f" % (rows_a, cols_a, rows_b, cols_b, swing)) # Run promise simulator # TODO need to print time and energy in the ptm runner so we can pipe it output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \ @@ -172,11 +176,11 @@ def run_promise_simulation(swing, layer_data): total_time_energy = output.strip().split(',') assert(len(total_time_energy) == 2) + print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1])) return float(total_time_energy[0]), float(total_time_energy[1]) def run_gpu_simulation(curr_layer, layer_name, tensor_ind): - print(layer_name, tensor_ind) tensor_info = tensor_table[layer_name][tensor_ind] if curr_layer == ApproxTypes.FP32: conversion_time = tensor_info["fp32_time"] @@ -219,11 +223,9 @@ def run_simulations(config_filename): layer_name = layer_data["Name"] if is_promise(config_layer): - print("IS PROMISE") print("Running layer %s on PROMISE" % layer_name) curr_layer = ApproxTypes.PROMISE quant_time, quant_energy = quantize(curr_layer, prev_layer, 0, layer_data) - # Compute time, energy = run_promise_simulation(config_layer, layer_data) aggregate_results[results_time_key][config_count] += time @@ -235,23 +237,24 @@ def run_simulations(config_filename): total_time = 0 total_energy = 0 - print(tensor_ops) for tensor_ind, tensor_op in enumerate(tensor_ops): # sublevle + tensor_op = int(tensor_op) if tensor_op == fp16_swing: curr_layer = ApproxTypes.FP16 else: curr_layer = ApproxTypes.FP32 quant_time, quant_energy = quantize(curr_layer, prev_layer, tensor_ind, \ layer_data) + conv_time, conv_energy = run_gpu_simulation(curr_layer, layer_name, tensor_ind) total_time += conv_time total_time += conv_energy aggregate_results[results_time_key][config_count] += total_time aggregate_results[results_energy_key][config_count] += total_energy - print("DONE WITH LAYER") prev_layer = curr_layer config_count += 1 + print("\n") config_count += 1 # because we're storing the count and not the index config_file.close() @@ -295,4 +298,4 @@ if __name__ == "__main__": parse_tensor_layer_file("/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_mobilenet/mobilenet_layers.txt") parse_tensor_table("/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_pldi/mobilenet_results/mobilenet_tensors.txt") run_simulations("/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_mobilenet/mobilenet_conf2.txt") - display_results("blah.txt") + #display_results("blah.txt")