Skip to content
Snippets Groups Projects
Commit 9b51ef5c authored by Ubuntu's avatar Ubuntu
Browse files

merging

parents bb302f7c de4a1143
No related branches found
No related tags found
No related merge requests found
......@@ -2,9 +2,21 @@ import glob
import os
import subprocess
import shutil
import sys
from collections import defaultdict
'''
FORMAT
** LayerName NumOpsInLayer <cols>
OpName Col1Val Col2Val ...
** Conv1 1 h2f_time h2f_energy fp32_time fp32_energy f2h_time f2h_energy fp16_perf_time fp16_perf_energy fp16_time fp16_energy
Conv1 51.8808 97.2844 319.582 601.966 12.81 18.758 388.092 650.649 340.037 590.664
'''
class TableGenerator:
__ops_header_delimiter = "#"
......@@ -53,7 +65,7 @@ class TableGenerator:
3. Writes the internal table to <network_name>_tensors.txt file and uses the
<network_name>_ops.txt file as a guideline in terms of row order
'''
self.__run_inputted_binaries()
#self.__run_inputted_binaries()
self.__build_internal_table()
self.__output_table_to_file()
......@@ -282,8 +294,11 @@ class TableGenerator:
if __name__ == "__main__":
binary_dir_path = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_pldi/mobilenet"
num_iters = 1
profiler_binary_name = "/home/nvidia/awesome_profiler/pp"
if len(sys.argv) != 4:
print("python table_generator.py <binary dir path> <num itrs> <profiler bin path>")
exit(1)
binary_dir_path = sys.argv[1]
num_iters = int(sys.argv[2])
profiler_binary_name = sys.argv[3]
table_gen = TableGenerator(binary_dir_path, num_iters, profiler_binary_name)
table_gen.generate_table()
# Python driver -- ported from Perl driver (driver.pl)
from collections import defaultdict
import os
import subprocess
import sys
def build_nested_default_dict():
return defaultdict(build_nested_default_dict)
class Driver:
fp16_swing = 8
tensor_layers = defaultdict(build_nested_default_dict)
class ApproxTypes:
FP16 = 0
FP32 = 1
PROMISE = 2
def is_conv(operation_name):
return operation_name.startswith("Conv")
results_time_key = "Time"
results_energy_key = "Energy"
def is_nml(operation_name):
return operation_name.startswith("NML")
def is_fc(operation_name):
return operation_name.startswith("FC")
def driver(self):
self.__parse_tensor_layer_file()
self.__parse_tensor_table()
self.__run_simulations()
self.__display_results()
def parse_tensor_layer_file(layer_filename):
'''
Convs: Layer name, N, Cin, H, W, Cout, Kh, Kw, Sh, Sw
FCs: Layer name, Rows_A, Cols_A, Rows_B, Cols_B
NMLs (No Man Lands): NML<number> (edited)
'''
if not os.path.isfile(layer_filename):
print("ERROR: %s was not found." % layer_filename)
exit(1)
layer_file = open(layer_filename, "r")
for line in layer_file:
layer_data = line.strip().split(',')
layer_name = layer_data[0]
if is_conv(layer_name):
tensor_layers[layer_name]["N"] = layer_data[1]
tensor_layers[layer_name]["Cin"] = layer_data[2]
tensor_layers[layer_name]["H"] = layer_data[3]
tensor_layers[layer_name]["W"] = layer_data[4]
tensor_layers[layer_name]["Cout"] = layer_data[5]
tensor_layers[layer_name]["Kh"] = layer_data[6]
tensor_layers[layer_name]["Kw"] = layer_data[7]
tensor_layers[layer_name]["Sh"] = layer_data[8]
tensor_layers[layer_name]["Sw"] = layer_data[9]
elif is_fc(layer_name):
tensor_layers[layer_name]["RA"] = layer_data[1]
tensor_layers[layer_name]["CA"] = layer_data[2]
tensor_layers[layer_name]["RB"] = layer_data[3]
tensor_layers[layer_name]["CB"] = layer_data[4]
elif not is_nml(layer_name): # TODO should we store data for NMLs?
print("ERROR: Invalid layer name %s" % layer_name)
exit(1)
layer_file.close()
# should this be a nested dict of dicts?
# [layer_name][operation_name][cols]
tensor_table = defaultdict(build_nested_default_dict)
def parse_tensor_table(table_filename):
if not os.path.isfile(table_filename):
print("ERROR: %s was not found." % table_filename)
exit(1)
def __init__(self, layer_filename, table_filename, config_filename, results_filename):
self.__layer_filename = layer_filename
self.__table_filename = table_filename
self.__config_filename = config_filename
self.__results_filename = results_filename
# NOTE: Use an OrderedDict if we want to search by operation name
# Using a list bc we care about the order the data is read in
# since it corresponds to the data in the configuration file
self.__tensor_layers = []
# [layer_name][operation_name][cols]
# Operation names need to be stored in order of insertion
self.__tensor_table = defaultdict(lambda: list(defaultdict(str)))
# [Time/Energy][number corresponding to order the layer config was read in] = time/energy
self.__aggregate_results = defaultdict(lambda: defaultdict(float))
self.__config_count = 0
@staticmethod
def is_conv(operation_name):
return operation_name.startswith("Conv")
@staticmethod
def is_nml(operation_name):
return operation_name.startswith("NML")
@staticmethod
def is_fc(operation_name):
return operation_name.startswith("FC")
def __parse_tensor_layer_file(self):
if not os.path.isfile(self.__layer_filename):
print("ERROR: %s was not found." % self.__layer_filename)
exit(1)
layer_file = open(self.__layer_filename, "r")
for line in layer_file:
layer_data = line.strip().split(',')
layer_name = layer_data[0]
tensor_layer = defaultdict(str)
tensor_layer["Name"] = layer_name
if Driver.is_conv(layer_name):
tensor_layer["N"] = float(layer_data[1])
tensor_layer["Cin"] = float(layer_data[2])
tensor_layer["H"] = float(layer_data[3])
tensor_layer["W"] = float(layer_data[4])
tensor_layer["Cout"] = float(layer_data[5])
tensor_layer["Kh"] = float(layer_data[7])
tensor_layer["Kw"] = float(layer_data[8])
tensor_layer["Sh"] = float(layer_data[9])
tensor_layer["Sw"] = float(layer_data[10])
elif Driver.is_fc(layer_name):
tensor_layer["RA"] = float(layer_data[1])
tensor_layer["CA"] = float(layer_data[2])
tensor_layer["RB"] = float(layer_data[3])
tensor_layer["CB"] = float(layer_data[4])
elif not Driver.is_nml(layer_name): # TODO should we store data for NMLs?
print("ERROR: Invalid layer name %s" % layer_name)
exit(1)
self.__tensor_layers.append(tensor_layer)
layer_file.close()
def __parse_tensor_table(self):
if not os.path.isfile(self.__table_filename):
print("ERROR: %s was not found." % self.__table_filename)
exit(1)
table_file = open(self.__table_filename, "r")
line = table_file.readline().strip()
while line:
# Line here MUST be a header or there's a bug
# Get the description of the layer
assert(line.startswith("**"))
header_contents = line.split(' ')[1:]
layer_name = header_contents[0]
num_ops = int(header_contents[1])
col_names = header_contents[2:]
layer_operations = []
# Go through all operations in the layer
for op_count in range(num_ops):
operation_data = defaultdict(str)
line = table_file.readline().strip()
op_data = line.split(' ')
op_name = op_data[0]
operation_data["Name"] = op_name
# Number of data items (#s) needs to match up with the # of cols
assert(len(op_data) - 1 == len(col_names))
# Go through all data items (each col element) per operation
for i in range(len(col_names)):
operation_data[col_names[i]] = float(op_data[i + 1])
layer_operations.append(operation_data)
self.__tensor_table[layer_name] = layer_operations
line = table_file.readline().strip()
table_file.close()
@staticmethod
def is_promise(config_layer):
return float(config_layer.split(' ')[0]) < Driver.fp16_swing
def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data):
if curr_layer == prev_layer or curr_layer == Driver.ApproxTypes.PROMISE \
or prev_layer == Driver.ApproxTypes.PROMISE: # No quantization needed
return 0.0, 0.0
layer_name = layer_data["Name"]
# NOTE: Ignoring logic where curr == promise or prev == promise bc
# smartDMA is always true so we'd return near the beginning of the method
# Get h2f/f2h data using the first tensor operation in the layer
# (which is why order matters in the tensor table)
tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind]
if curr_layer == Driver.ApproxTypes.FP32:
time = tensor_op_row["h2f_time"]
energy = tensor_op_row["h2f_energy"]
elif curr_layer == Driver.ApproxTypes.FP16:
time = tensor_op_row["f2h_time"]
energy = tensor_op_row["f2h_energy"]
print("Quantization: (%f, %f)" % (time, energy))
return (time, energy)
def __run_promise_simulation(self, swing, layer_data):
layer_name = layer_data["Name"]
patch_factor = 1
if Driver.is_conv(layer_name):
rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \
/ (layer_data["Sh"] * layer_data["Sw"])
cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"]
rows_b = cols_a
cols_b = layer_data["Cout"]
patch_factor = layer_data["Kh"] * layer_data["Kw"]
elif Driver.is_fc(layer_name):
rows_a = layer_data["RA"]
cols_a = layer_data["CA"]
rows_b = cols_a
cols_b = layer_data["CB"]
else:
print("PROMISE can't run whatever this layer is.")
exit(1)
# Run promise simulator
# TODO need to print time and energy in the ptm runner so we can pipe it
output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \
str(cols_b), str(patch_factor), str(swing)], \
stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0]
total_time_energy = output.strip().split(',')
assert(len(total_time_energy) == 2)
print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1]))
return float(total_time_energy[0]), float(total_time_energy[1])
def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind):
tensor_info = self.__tensor_table[layer_name][tensor_ind]
if curr_layer == Driver.ApproxTypes.FP32:
conversion_time = tensor_info["fp32_time"]
conversion_energy = tensor_info["fp32_energy"]
else:
conversion_time = tensor_info["fp16_time"]
conversion_energy = tensor_info["fp16_energy"]
print("GPU: (%f, %f)" % (conversion_time, conversion_energy))
return (conversion_time, conversion_energy)
def __run_simulations(self):
print("run sim")
if not os.path.isfile(self.__config_filename):
print("ERROR: %s was not found" % self.__config_filename)
exit(1)
config_file = open(self.__config_filename, "r")
# each line = indepedent configuration
# layers are separated by commas
# tensor ops are separated by spaces
for config in config_file:
config_layers = config.strip().split(',')
prev_layer = Driver.ApproxTypes.FP32
curr_layer = None
for layer_ind, config_layer in enumerate(config_layers): # level
layer_data = self.__tensor_layers[layer_ind] # layer
layer_name = layer_data["Name"]
if Driver.is_promise(config_layer):
print("Running layer %s on PROMISE" % layer_name)
curr_layer = Driver.ApproxTypes.PROMISE
quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, 0, layer_data)
# Compute
time, energy = self.__run_promise_simulation(config_layer, layer_data)
print(time, energy)
self.__aggregate_results[Driver.results_time_key][self.__config_count] += time
self.__aggregate_results[Driver.results_energy_key][self.__config_count] += energy
else:
print("Running layer %s on the GPU" % layer_name)
tensor_ops = config_layer.split(' ')
total_time = 0
total_energy = 0
for tensor_ind, tensor_op in enumerate(tensor_ops): # sublevle
tensor_op = int(tensor_op)
if tensor_op == Driver.fp16_swing:
curr_layer = Driver.ApproxTypes.FP16
else:
curr_layer = Driver.ApproxTypes.FP32
quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, tensor_ind, layer_data)
conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_ind)
total_time += quant_time + conv_time
total_energy += quant_energy + conv_energy
self.__aggregate_results[Driver.results_time_key][self.__config_count] += total_time
self.__aggregate_results[Driver.results_energy_key][self.__config_count] += total_energy
prev_layer = curr_layer
self.__config_count += 1
print("\n")
config_file.close()
def __display_results(self):
results_file = open(self.__results_filename, "w")
attributes_to_print = [Driver.results_time_key, Driver.results_energy_key]
for attribute in attributes_to_print:
results_file.write("%s\n" % attribute)
results_file.write("Configuration,Total,Improvement\n")
baseline_val = self.__aggregate_results[attribute][0]
print(baseline_val)
best_config = None
best_result = None
for config_ind in range(self.__config_count):
results_file.write("c%d" % config_ind)
time_or_energy_val = self.__aggregate_results[attribute][config_ind]
# Using repr to keep all decimal digits when writing to file
results_file.write(",%s" % repr(time_or_energy_val))
results_file.write(",%s\n" % repr(baseline_val / (time_or_energy_val + 0.0001)))
if not best_result or time_or_energy_val < best_result:
best_result = time_or_energy_val
best_config = config_ind
results_file.write("\nc%d,%s\n\n" % (best_config, repr(self.__aggregate_results[attribute][best_config])))
results_file.close()
table_file = open(table_filename, "r")
line = table_file.readline().strip()
while line:
# Line here MUST be a header or there's a bug
# Get the description of the layer
assert(line.startswith("**"))
header_contents = line.split(' ')[1:]
layer_name = header_contents[0]
num_ops = int(header_contents[1])
col_names = header_contents[2:]
# Go through all operations in the layer
for op_count in range(num_ops):
line = table_file.readline().strip()
op_data = line.split(' ')
op_name = op_data[0]
# Number of data items (#s) needs to match up with the # of cols
assert(len(op_data) - 1 == len(col_names))
# Go through all data items (each col element) per operation
for i in range(len(col_names)):
tensor_table[layer_name][op_name][col_names[i]] = op_data[i + 1]
line = table_file.readline().strip()
table_file.close()
def run_simulations():
# open configuration file
# open results file
# read through each line in the configuration file
# for each config file line --> parse the comma separated voltage swing levels
# recall: each line = a configuration that works
# for each level
# if promise --> promise runs an entire layer
# quantize, no patching and unpatching
# run on promise
# output the total time and energy
# else
# for each sublevel (separated by spaces)
# quantize
# run
# keep track of total time and energy --> update as needed
# output the total time and energy
# quantization: we always have smart dma
# need to search stuff up
# $layer = a map of elements
# stores the layer name, then
if __name__ == "__main__":
if len(sys.argv) != 4):
if len(sys.argv) != 5:
print("Usage: python driver.py <layer info> <tensor info> <configurations> <results file>")
exit(1)
Driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]).driver()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment