Skip to content
Snippets Groups Projects
Commit 6bed5a20 authored by kotsifa2's avatar kotsifa2
Browse files

Merge branch 'approx_hpvm' of gitlab.engr.illinois.edu:llvm/hpvm into approx_hpvm

parents 7f496c1b 4f654e96
No related branches found
No related tags found
No related merge requests found
......@@ -34,7 +34,8 @@ class Driver:
# Operation names need to be stored in order of insertion
self.__tensor_table = defaultdict(lambda: list(defaultdict(str)))
self.__conf_results = {} # {conf name: (first line, [[layer value if promise], [tensor vals if gpu]])}
self.__conf_results = [] # indexed
#self.__conf_results = {} # {conf name: (first line, [[layer value if promise], [tensor vals if gpu]])}
@staticmethod
......@@ -77,7 +78,6 @@ class Driver:
if not os.path.isfile(self.__layer_filename):
print("ERROR: %s was not found." % self.__layer_filename)
exit(1)
layer_file = open(self.__layer_filename, "r")
for line in layer_file:
layer_data = line.strip().split(',')
......@@ -139,14 +139,14 @@ class Driver:
operation_data["Name"] = op_name
# Number of data items (#s) needs to match up with the # of cols
#print(len(op_data) - 1, len(col_names))
#print(op_data)
#print(col_names)
print(len(op_data) - 1, len(col_names))
print(op_data)
print(col_names)
#assert(len(op_data) - 1 == len(col_names))
assert(len(op_data) - 1 == len(col_names))
# Go through all data items (each col element) per operation
for i in range(len(col_names)):
#print(col_names[i], float(op_data[i + 1]))
print(col_names[i], float(op_data[i + 1]))
operation_data[col_names[i]] = float(op_data[i + 1])
layer_operations.append(operation_data)
......@@ -214,8 +214,6 @@ class Driver:
elif Driver.is_gpu(layer_as_lst[1]):
print("Running layer %s on the GPU" % layer_name)
total_time = 0
total_energy = 0
tensor_count = 0
# 3 elements per tensor operation
......@@ -245,8 +243,6 @@ class Driver:
conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, \
tensor_count, approx_type, op_number)
layer_results.append((quant_time + conv_time, quant_energy + conv_energy, ' '.join(layer_as_lst[i : i + 3])))
total_time += quant_time + conv_time
total_energy += quant_energy + conv_energy
prev_layer = curr_layer
tensor_count += 1
......@@ -254,7 +250,8 @@ class Driver:
prev_layer = curr_layer
curr_conf_results.append(layer_results)
self.__conf_results[conf_name] = (first_line, curr_conf_results)
#self.__conf_results[conf_name] = (first_line, curr_conf_results)
self.__conf_results.append( (first_line, curr_conf_results) )
line = config_file.readline().strip()
config_file.close()
#print("AGGREGATE RESULTS", self.__aggregate_results)
......@@ -351,10 +348,10 @@ class Driver:
elif curr_layer == Driver.PrecisionTypes.FP16:
time_key = "fp16_time"
energy_key = "fp16_energy"
#print(time_key, energy_key)
print(time_key, energy_key)
conversion_time = tensor_info[time_key]
conversion_energy = tensor_info[energy_key]
#print(conversion_time, conversion_energy)
print(conversion_time, conversion_energy)
print("GPU: (%f, %f)" % (conversion_time, conversion_energy))
return conversion_time, conversion_energy
......@@ -401,7 +398,7 @@ class Driver:
def get_final_times_energies_conf(curr_conf):
final_time = final_energy = 0
final_conf = []
final_conf = [] # List (conf) of lists (layers) of tuples (operation data)
for layer_ind, layer in enumerate(curr_conf[1]):
final_conf_layer = []
......@@ -409,39 +406,48 @@ class Driver:
for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer):
baseline_time, baseline_energy, baseline_op = baseline_conf[1][layer_ind][tensor_ind]
final_tensor_op = tensor_op
if op_time > baseline_time:
final_time += baseline_time
final_tensor_op = baseline_op
final_energy += baseline_energy
final_tensor_op = baseline_op
else:
final_time += op_time
final_time += op_time
final_energy += op_energy
'''
# Ignoring bigger energies for now
if op_energy > baseline_energy:
final_time += baseline_energy
final_energy += baseline_energy
final_tensor_op = baseline_op
final_tensor_op = baseline_op
else:
final_time += op_time
final_energy += op_energy
'''
final_conf_layer.append((None, None, final_tensor_op)) # Don't care about the times and energies when writing
final_conf.append(final_conf_layer)
return final_time, final_energy, (curr_conf[0], final_conf)
conf_index = 0
for line in config_file:
if line.startswith("conf"):
orig_line_lst = line.split(' ')
conf_name = orig_line_lst[0]
if not baseline_conf:
baseline_conf = self.__conf_results[conf_name]
baseline_conf = self.__conf_results[conf_index] #conf_name]
print("FOUND baseline", baseline_conf)
baseline_total_time, baseline_total_energy = get_baseline_times_enegies()
results_file.write("%s\n" % repr(baseline_total_time)) # write baseline time to top of file
results_file.write("%s %s\n" % (repr(baseline_total_time), repr(baseline_total_energy))) # write baseline time to top of file
write_conf_to_file(conf_name, baseline_conf, 1, 1)
else:
curr_conf = self.__conf_results[conf_name]
curr_conf = self.__conf_results[conf_index] #conf_name]
final_time, final_energy, curr_conf = get_final_times_energies_conf(curr_conf)
assert(final_time <= baseline_total_time)
assert(final_energy <= baseline_total_energy)
write_conf_to_file(conf_name, curr_conf, final_time / baseline_total_time, final_energy / baseline_total_energy)
#assert(final_energy <= baseline_total_energy)
#write_conf_to_file(conf_name, curr_conf, final_time, final_energy)
write_conf_to_file(conf_name, curr_conf, baseline_total_time / final_time, baseline_total_energy / final_energy)
conf_index += 1
results_file.close()
config_file.close()
......
from collections import defaultdict
import os
import subprocess
import sys
class Driver:
fp16_swing = 8
class PrecisionTypes:
FP16 = 0
FP32 = 1
PROMISE = 2
class ApproxTypes:
PERF = 3
SAMP = 4
results_time_key = "Time"
results_energy_key = "Energy"
def __init__(self, layer_filename, table_filename, config_filename, results_filename):
self.__layer_filename = layer_filename
self.__table_filename = table_filename
self.__config_filename = config_filename
self.__results_filename = results_filename
# NOTE: Use an OrderedDict if we want to search by operation name
# Using a list bc we care about the order the data is read in
# since it corresponds to the data in the configuration file
self.__tensor_layers = []
# [layer_name][operation_name][cols]
# Operation names need to be stored in order of insertion
self.__tensor_table = defaultdict(lambda: list(defaultdict(str)))
self.__conf_results = {} # {conf name: (first line, [[layer value if promise], [tensor vals if gpu]])}
@staticmethod
def is_conv(operation_name):
return operation_name.startswith("Conv")
@staticmethod
def is_nml(operation_name):
return operation_name.startswith("NML")
@staticmethod
def is_fc(operation_name):
return operation_name.startswith("FC")
# FOR DEBUGGING ONLY
def __get_str(self, appr):
if appr == Driver.PrecisionTypes.FP16:
return "FP16"
elif appr == Driver.PrecisionTypes.FP32:
return "FP32"
elif appr == Driver.PrecisionTypes.PROMISE:
return "PROMISE"
elif appr == Driver.ApproxTypes.PERF:
return "PERF"
elif appr == Driver.ApproxTypes.SAMP:
return "SAMP"
def driver(self):
self.__parse_tensor_layer_file()
self.__parse_tensor_table()
self.__run_simulations()
self.__write_output()
def __parse_tensor_layer_file(self):
if not os.path.isfile(self.__layer_filename):
print("ERROR: %s was not found." % self.__layer_filename)
exit(1)
layer_file = open(self.__layer_filename, "r")
for line in layer_file:
layer_data = line.strip().split(',')
layer_name = layer_data[0]
tensor_layer = defaultdict(str)
tensor_layer["Name"] = layer_name
if Driver.is_conv(layer_name):
tensor_layer["N"] = float(layer_data[1])
tensor_layer["Cin"] = float(layer_data[2])
tensor_layer["H"] = float(layer_data[3])
tensor_layer["W"] = float(layer_data[4])
tensor_layer["Cout"] = float(layer_data[5])
tensor_layer["Kh"] = float(layer_data[7])
tensor_layer["Kw"] = float(layer_data[8])
tensor_layer["Sh"] = float(layer_data[9])
tensor_layer["Sw"] = float(layer_data[10])
elif Driver.is_fc(layer_name):
tensor_layer["RA"] = float(layer_data[1])
tensor_layer["CA"] = float(layer_data[2])
tensor_layer["RB"] = float(layer_data[3])
tensor_layer["CB"] = float(layer_data[4])
elif not Driver.is_nml(layer_name): # TODO should we store data for NMLs?
print("ERROR: Invalid layer name %s" % layer_name)
exit(1)
self.__tensor_layers.append(tensor_layer)
layer_file.close()
def __parse_tensor_table(self):
if not os.path.isfile(self.__table_filename):
print("ERROR: %s was not found." % self.__table_filename)
exit(1)
table_file = open(self.__table_filename, "r")
line = table_file.readline().strip()
while line:
# Line here MUST be a header or there's a bug
# Get the description of the layer
assert(line.startswith("**"))
header_contents = line.split(' ')[1:]
layer_name = header_contents[0]
num_ops = int(header_contents[1])
col_names = header_contents[2:]
layer_operations = []
# Go through all operations in the layer
for op_count in range(num_ops):
operation_data = defaultdict(str)
line = table_file.readline().strip()
op_data = line.split(' ')
op_name = op_data[0]
operation_data["Name"] = op_name
# Number of data items (#s) needs to match up with the # of cols
#print(len(op_data) - 1, len(col_names))
#print(op_data)
#print(col_names)
#assert(len(op_data) - 1 == len(col_names))
# Go through all data items (each col element) per operation
for i in range(len(col_names)):
#print(col_names[i], float(op_data[i + 1]))
operation_data[col_names[i]] = float(op_data[i + 1])
layer_operations.append(operation_data)
self.__tensor_table[layer_name] = layer_operations
line = table_file.readline().strip()
table_file.close()
@staticmethod
def is_promise(layer_hardware):
return layer_hardware == "promise"
@staticmethod
def is_gpu(layer_hardware):
return layer_hardware == "gpu"
def __run_simulations(self):
config_file = open(self.__config_filename, "r")
line = config_file.readline().strip()
while line:
assert(line == "+++++")
print("CONFIGURATION")
curr_conf_results = []
prev_layer = Driver.PrecisionTypes.FP32
curr_layer = None
line = config_file.readline().strip()
first_line = line
conf_name = line.split(' ')[0]
assert(conf_name.startswith("conf"))
line = config_file.readline().strip()
while line != "-----":
# Skip softmax
if line.find("softmax") != -1:
line = config_file.readline().strip()
continue
layer_as_lst = line.split(' ')
layer_ind = int(layer_as_lst[0]) - 1
layer_table_data = self.__tensor_layers[layer_ind]
layer_name = layer_table_data["Name"]
layer_results = []
if Driver.is_promise(layer_as_lst[1]):
print("Running layer %s on PROMISE" % layer_name)
curr_layer = Driver.PrecisionTypes.PROMISE
total_time = 0
total_energy = 0
# To support multiple sets of <param> <number> in the future
for i in range(2, len(layer_as_lst), 2):
param_name = layer_as_lst[i] # Use when there's more than 1 type of param
param_val = int(layer_as_lst[i + 1])
time, energy = self.__run_promise_simulation(param_val, layer_table_data)
total_time += time
total_energy += energy
layer_results.append((total_time, total_energy, ' '.join(layer_as_lst[1:])))
elif Driver.is_gpu(layer_as_lst[1]):
print("Running layer %s on the GPU" % layer_name)
total_time = 0
total_energy = 0
tensor_count = 0
# 3 elements per tensor operation
for i in range(2, len(layer_as_lst), 3):
op_type = layer_as_lst[i]
precision_type = layer_as_lst[i + 1]
op_number = layer_as_lst[i + 2]
approx_type = None
if precision_type == "fp16" or line.find("fp16") != -1:
curr_layer = Driver.PrecisionTypes.FP16
elif precision_type == "fp32" or line.find("fp32") != -1:
curr_layer = Driver.PrecisionTypes.FP32
elif precision_type == "perf" or precision_type == "samp": # Handle approx type
if precision_type == "perf":
approx_type = Driver.ApproxTypes.PERF
elif precision_type == "samp":
approx_type = Driver.ApproxTypes.SAMP
if line.find("fp16") != -1:
curr_layer = Driver.PrecisionTypes.FP16
elif line.find("fp32") != -1:
curr_layer = Driver.PrecisionTypes.FP32
quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, \
tensor_count, layer_table_data)
conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, \
tensor_count, approx_type, op_number)
layer_results.append((quant_time + conv_time, quant_energy + conv_energy, ' '.join(layer_as_lst[i : i + 3])))
total_time += quant_time + conv_time
total_energy += quant_energy + conv_energy
prev_layer = curr_layer
tensor_count += 1
line = config_file.readline().strip()
prev_layer = curr_layer
curr_conf_results.append(layer_results)
self.__conf_results[conf_name] = (first_line, curr_conf_results)
line = config_file.readline().strip()
config_file.close()
#print("AGGREGATE RESULTS", self.__aggregate_results)
def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data):
if curr_layer == prev_layer or curr_layer == Driver.PrecisionTypes.PROMISE \
or prev_layer == Driver.PrecisionTypes.PROMISE:
return 0.0, 0.0
layer_name = layer_data["Name"]
# NOTE: Ignoring logic where curr == promise or prev == promise bc
# smartDMA is always true so we'd return near the beginning of the method
# Get h2f/f2h data using the first tensor operation in the layer
# (which is why order matters in the tensor table)
tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind]
time_key = None
energy_key = None
if curr_layer == Driver.PrecisionTypes.FP32:
time_key = "h2f_time"
energy_key = "h2f_energy"
elif curr_layer == Driver.PrecisionTypes.FP16:
time_key = "f2h_time"
energy_key = "f2h_energy"
time = tensor_op_row[time_key]
energy = tensor_op_row[energy_key]
print(time_key, energy_key, time, energy)
print("Quantization: (%f, %f)" % (time, energy))
return (time, energy)
def __run_promise_simulation(self, swing, layer_data):
layer_name = layer_data["Name"]
patch_factor = 1
if Driver.is_conv(layer_name):
rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \
/ (layer_data["Sh"] * layer_data["Sw"])
cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"]
rows_b = cols_a
cols_b = layer_data["Cout"]
patch_factor = layer_data["Kh"] * layer_data["Kw"]
elif Driver.is_fc(layer_name):
rows_a = layer_data["RA"]
cols_a = layer_data["CA"]
rows_b = cols_
cols_b = layer_data["CB"]
else:
print("PROMISE can't run whatever this layer is.")
exit(1)
# Run promise simulator
# TODO need to print time and energy in the ptm runner so we can pipe it
output = subprocess.Popen(["./ptm_new", str(rows_a), str(cols_a), str(rows_b), \
str(cols_b), str(patch_factor), str(swing)], \
stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0]
total_time_energy = output.strip().split(',')
assert(len(total_time_energy) == 2)
print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1]))
return float(total_time_energy[0]), float(total_time_energy[1])
def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind, \
approx_type = None, knob_number = None):
tensor_info = self.__tensor_table[layer_name][tensor_ind]
time_key = None
energy_key = None
if approx_type == Driver.ApproxTypes.PERF or approx_type == Driver.ApproxTypes.SAMP: # fp16_perf2_energy
approx_type_str = None
if approx_type == Driver.ApproxTypes.PERF:
approx_type_str = "perf"
elif approx_type == Driver.ApproxTypes.SAMP:
approx_type_str = "samp"
if curr_layer == Driver.PrecisionTypes.FP32:
print("in fp32", approx_type_str)
time_key = "fp32_%s%s_time" % (approx_type_str, knob_number)
energy_key = "fp32_%s%s_energy" % (approx_type_str, knob_number)
elif curr_layer == Driver.PrecisionTypes.FP16:
print("in fp16", approx_type_str)
time_key = "fp16_%s%s_time" % (approx_type_str, knob_number)
energy_key = "fp16_%s%s_energy" % (approx_type_str, knob_number)
else: # None for now
if curr_layer == Driver.PrecisionTypes.FP32:
time_key = "fp32_time"
energy_key = "fp32_energy"
elif curr_layer == Driver.PrecisionTypes.FP16:
time_key = "fp16_time"
energy_key = "fp16_energy"
#print(time_key, energy_key)
conversion_time = tensor_info[time_key]
conversion_energy = tensor_info[energy_key]
#print(conversion_time, conversion_energy)
print("GPU: (%f, %f)" % (conversion_time, conversion_energy))
return conversion_time, conversion_energy
def __write_output(self):
config_file = open(self.__config_filename, "r")
results_file = open(self.__results_filename, "w")
def write_conf_to_file(conf_name, final_conf, time_speedup, energy_speedup):
# conf = [layer value if promise], [tensor vals if gpu]]
conf_str = ["+++++"]
# process the first line
first_line, layers = final_conf
first_line_lst = first_line.split(' ')
assert first_line_lst[0] == conf_name
new_header = [conf_name]
new_header.append(repr(time_speedup))
new_header.append(repr(energy_speedup))
new_header.append(first_line_lst[-1])
new_header.append(first_line_lst[-2])
conf_str.append(' '.join(new_header))
for ind, layer in enumerate(layers):
layer_lst = [str(ind + 1)]
for op_time, op_energy, tensor_op in layer:
layer_lst.append(tensor_op)
conf_str.append(' '.join(layer_lst))
conf_str.append("-----\n")
results_file.write('\n'.join(conf_str))
baseline_conf = None
baseline_total_time = baseline_total_energy = 0
def get_baseline_times_enegies():
curr_time = curr_energy = 0
for layer in baseline_conf[1]:
for op_time, op_energy, tensor_op in layer:
curr_time += op_time
curr_energy += op_energy
return curr_time, curr_energy
def get_final_times_energies_conf(curr_conf):
final_time = final_energy = 0
final_conf = []
for layer_ind, layer in enumerate(curr_conf[1]):
final_conf_layer = []
for tensor_ind, (op_time, op_energy, tensor_op) in enumerate(layer):
baseline_time, baseline_energy, baseline_op = baseline_conf[1][layer_ind][tensor_ind]
final_tensor_op = tensor_op
if op_time > baseline_time:
final_time += baseline_time
final_tensor_op = baseline_op
else:
final_time += op_time
# Ignoring bigger energies for now
'''
if op_energy > baseline_energy:
print("BIGGER ENERGY")
final_energy += baseline_energy
final_tensor_op = baseline_op
else:
final_energy += op_energy
'''
final_energy += op_energy
final_conf_layer.append((None, None, final_tensor_op)) # Don't care about the times and energies when writing
final_conf.append(final_conf_layer)
return final_time, final_energy, (curr_conf[0], final_conf)
for line in config_file:
if line.startswith("conf"):
orig_line_lst = line.split(' ')
conf_name = orig_line_lst[0]
if not baseline_conf:
baseline_conf = self.__conf_results[conf_name]
print("FOUND baseline", baseline_conf)
baseline_total_time, baseline_total_energy = get_baseline_times_enegies()
results_file.write("%s\n" % repr(baseline_total_time)) # write baseline time to top of file
write_conf_to_file(conf_name, baseline_conf, 1, 1)
else:
curr_conf = self.__conf_results[conf_name]
final_time, final_energy, curr_conf = get_final_times_energies_conf(curr_conf)
assert(final_time <= baseline_total_time)
#assert(final_energy <= baseline_total_energy)
write_conf_to_file(conf_name, curr_conf, final_time / baseline_total_time, final_energy / baseline_total_energy)
results_file.close()
config_file.close()
if __name__ == "__main__":
if len(sys.argv) != 5:
print("Usage: python driver.py <layer info> <tensor info> <configurations> <results file>")
exit(1)
Driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]).driver()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment