Skip to content
Snippets Groups Projects
Commit 16a9a7ff authored by kotsifa2's avatar kotsifa2
Browse files

Merge branch 'approx_hpvm' of gitlab.engr.illinois.edu:llvm/hpvm into approx_hpvm

parents 00277bd7 db194ee5
No related branches found
No related tags found
No related merge requests found
......@@ -6,10 +6,25 @@
* CUDA-9.0 or above
* CUBLAS-9.0 or above - often included with cuda-toolkit
## Dependent Library Builds
```shell
cd ../gpu_profiler
mkdir lib
cmake ../
make
cd ../soc_simulator
mkdir lib
cmake ../
make
```
## BUILD
```shell
source bin/setup_runtime_paths.sh
source bin/setup_cuda_llvm_paths.sh
mkdir build
cd build
cmake ../
......
#!/bin/bash
# CUDNN Path setup
module load cuda-toolkit/9.1
export CUDA_INCLUDE_PATH=/software/cuda-9.1/include
export CUDNN_PATH=/software/cuda-9.1/lib64/
export LIBRARY_PATH=/software/cuda-9.1/lib64/:$LIBRARY_PATH
export LD_LIBRARY_PATH=/software/cuda-9.1/lib64/:$LD_LIBRARY_PATH
# HPVM Path setup
export CPATH=$CPATH:/home/hsharif3/anaconda2/include/
export PATH=/home/hsharif3/Gitlab/hpvm/build/bin/:$PATH
export LLVM_BUILD_ROOT=/home/hsharif3/Gitlab/hpvm/build/
export LLVM_SRC_ROOT=/home/hsharif3/Gitlab/hpvm/llvm/
export LIBRARY_PATH=$LIBRARY_PATH:/usr/local/cuda-9.0/targets/aarch64-linux/lib/
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-9.0/targets/aarch64-linux/lib/
export CUDNN_PATH=/usr/local/cuda-9.0/
export CUDA_INCLUDE_PATH=${CUDNN_PATH}/include
export LLVM_BUILD_ROOT=/home/nvidia/Gitlab/hpvm/build/
export LLVM_SRC_ROOT=/home/nvidia/Gitlab/hpvm/llvm/
......@@ -65,7 +65,7 @@ class TableGenerator:
3. Writes the internal table to <network_name>_tensors.txt file and uses the
<network_name>_ops.txt file as a guideline in terms of row order
'''
#self.__run_inputted_binaries()
self.__run_inputted_binaries()
self.__build_internal_table()
self.__output_table_to_file()
......
......@@ -83,7 +83,23 @@ def getConfidence(accuracy_outfile, acc_threshold):
return conf, avg_acc
def getMinAccuracy(accuracy_outfile):
f = open(accuracy_outfile, "r")
total_acc = 0.0
failed = 0
it = 0
acc_list = []
for x in f:
acc = float(x.strip())
acc_list.append(acc)
return min(acc_list)
# NOTE: invokes the binary with the number of runs
def do_multiple_runs2(binary_name, accuracy_threshold, confidence_threshold):
......
......@@ -5,6 +5,9 @@ import shutil
from measure_confidence2 import getConfigCost
AL_THRESHOLD = 0.1
class Config:
def __init__(self):
self.avg_accuracy = 0
......@@ -69,10 +72,6 @@ def loadConfigData(result_dir, layer_costs, baseline_accuracy):
AL_THRESHOLD = 0.1
SPEEDUP_BAND_SIZE = 0.3
ENERGY_BAND_SIZE = 10
class Configuration:
def __init__(self, name, speedup, energy, accuracy, accuracy_loss):
......@@ -223,11 +222,17 @@ def findParetoConfigs(base_dir, layer_costs, accuracy):
config = Configuration(config.fname , config.speedup, 100, config.avg_accuracy, config.avg_loss)
config_list.append(config)
SPEEDUP_BAND_SIZE = 1.0
ENERGY_BAND_SIZE = 10
if len(config_list) < 30:
SPEEDUP_BAND_SIZE = 1.2
# No Pareto Selection if list is < 50 configurations
if len(config_list) < 50:
SPEEDUP_BAND_SIZE = 100 # Include all in Pareto Frontier
print ("*SPEEDUP_BAND_SIZE = ", SPEEDUP_BAND_SIZE)
ASC, AEC = compute_pareto_points_with_margin(config_list, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE)
......
......@@ -22,6 +22,7 @@ import threading
import psutil
from measure_confidence2 import dump_promise_confidence_files3
from measure_confidence2 import getConfidence, getMinAccuracy
from select_top_results import select_top_results
from time import sleep
from pareto_curve import findParetoConfigs
......@@ -169,25 +170,36 @@ class ClangFlagsTuner(MeasurementInterface):
createFlagsFile("promise_flags", cfg)
run_cmd = binary_name
print "binary_name = ", run_cmd
print "\nbinary_name = ", run_cmd
#run_result_call_program = self.call_program(run_cmd)
#print "returned \n\n"
total_runs = 2
FNULL = open(os.devnull, 'wb')
p = subprocess.Popen(run_cmd, stdout = FNULL)
#p = subprocess.Popen(run_cmd, stdout = FNULL)
p = subprocess.Popen([run_cmd, str(total_runs)], stdout = FNULL)
p.wait()
accuracy = getAccuracy("final_accuracy")
# Get Confidence for multiple runs
conf, avg_acc = getConfidence("run_accuracies.txt", accuracy_threshold)
# getConfigCost returns the cost associated with the selected configuration
total_comps = getConfigCost(cfg)
Result = opentuner.resultsdb.models.Result()
Result.time = total_comps
Result.accuracy = accuracy
if accuracy > accuracy_threshold:
#Result.accuracy = accuracy
min_accuracy = getMinAccuracy("run_accuracies.txt")
print ("min_accuracy = ", min_accuracy)
Result.accuracy = min_accuracy
# Only pass conf if conf == 100
if min_accuracy > accuracy_threshold and conf == 100:
print ("conf = ", conf, " avg_acc = ", avg_acc)
#if accuracy not in evaluated_configs:
config_tuple = (total_comps, accuracy, cfg)
self.configs_list.append(config_tuple)
......@@ -199,8 +211,6 @@ class ClangFlagsTuner(MeasurementInterface):
f_acc.close()
print "done with one run"
test_id += 1
return Result
......
......@@ -7,6 +7,9 @@ void dummyFunction(){
void* initRT = (void*) &llvm_hpvm_initTensorRt;
void* cleanRT = (void*) &llvm_hpvm_cleanupTensorRt;
void* initApproxRT = (void*) &llvm_hpvm_initApproxhpvmRt;
void* cleanApproxRT = (void*) &llvm_hpvm_cleanupApproxhpvmRt;
void* initRTController = (void*) &llvm_hpvm_initializeRuntimeController;
void* cleanRTController = (void*) &llvm_hpvm_clearRuntimeController;
......
from collections import defaultdict
import os
import subprocess
import sys
class Driver:
fp16_swing = 8
class ApproxTypes:
FP16 = 0
FP32 = 1
PROMISE = 2
PERF = 3
results_time_key = "Time"
results_energy_key = "Energy"
def driver(self):
self.__parse_tensor_layer_file()
self.__parse_tensor_table()
self.__run_simulations()
self.__display_results()
def __init__(self, layer_filename, table_filename, config_filename, results_filename):
self.__layer_filename = layer_filename
self.__table_filename = table_filename
self.__config_filename = config_filename
self.__results_filename = results_filename
# NOTE: Use an OrderedDict if we want to search by operation name
# Using a list bc we care about the order the data is read in
# since it corresponds to the data in the configuration file
self.__tensor_layers = []
# [layer_name][operation_name][cols]
# Operation names need to be stored in order of insertion
self.__tensor_table = defaultdict(lambda: list(defaultdict(str)))
# [Time/Energy][number corresponding to order the layer config was read in] = time/energy
self.__aggregate_results = defaultdict(lambda: defaultdict(float))
self.__config_count = 0
@staticmethod
def is_conv(operation_name):
return operation_name.startswith("Conv")
@staticmethod
def is_nml(operation_name):
return operation_name.startswith("NML")
@staticmethod
def is_fc(operation_name):
return operation_name.startswith("FC")
def __parse_tensor_layer_file(self):
if not os.path.isfile(self.__layer_filename):
print("ERROR: %s was not found." % self.__layer_filename)
exit(1)
layer_file = open(self.__layer_filename, "r")
for line in layer_file:
layer_data = line.strip().split(',')
layer_name = layer_data[0]
tensor_layer = defaultdict(str)
tensor_layer["Name"] = layer_name
if Driver.is_conv(layer_name):
tensor_layer["N"] = float(layer_data[1])
tensor_layer["Cin"] = float(layer_data[2])
tensor_layer["H"] = float(layer_data[3])
tensor_layer["W"] = float(layer_data[4])
tensor_layer["Cout"] = float(layer_data[5])
tensor_layer["Kh"] = float(layer_data[7])
tensor_layer["Kw"] = float(layer_data[8])
tensor_layer["Sh"] = float(layer_data[9])
tensor_layer["Sw"] = float(layer_data[10])
elif Driver.is_fc(layer_name):
tensor_layer["RA"] = float(layer_data[1])
tensor_layer["CA"] = float(layer_data[2])
tensor_layer["RB"] = float(layer_data[3])
tensor_layer["CB"] = float(layer_data[4])
elif not Driver.is_nml(layer_name): # TODO should we store data for NMLs?
print("ERROR: Invalid layer name %s" % layer_name)
exit(1)
self.__tensor_layers.append(tensor_layer)
layer_file.close()
def __parse_tensor_table(self):
if not os.path.isfile(self.__table_filename):
print("ERROR: %s was not found." % self.__table_filename)
exit(1)
table_file = open(self.__table_filename, "r")
line = table_file.readline().strip()
while line:
# Line here MUST be a header or there's a bug
# Get the description of the layer
assert(line.startswith("**"))
header_contents = line.split(' ')[1:]
layer_name = header_contents[0]
num_ops = int(header_contents[1])
col_names = header_contents[2:]
layer_operations = []
# Go through all operations in the layer
for op_count in range(num_ops):
operation_data = defaultdict(str)
line = table_file.readline().strip()
op_data = line.split(' ')
op_name = op_data[0]
operation_data["Name"] = op_name
# Number of data items (#s) needs to match up with the # of cols
assert(len(op_data) - 1 == len(col_names))
# Go through all data items (each col element) per operation
for i in range(len(col_names)):
operation_data[col_names[i]] = float(op_data[i + 1])
layer_operations.append(operation_data)
self.__tensor_table[layer_name] = layer_operations
line = table_file.readline().strip()
table_file.close()
@staticmethod
def is_promise(config_layer):
return float(config_layer.split(' ')[0]) < Driver.fp16_swing
def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data):
if curr_layer == prev_layer or curr_layer == Driver.ApproxTypes.PROMISE \
or prev_layer == Driver.ApproxTypes.PROMISE: # No quantization needed
return 0.0, 0.0
layer_name = layer_data["Name"]
# NOTE: Ignoring logic where curr == promise or prev == promise bc
# smartDMA is always true so we'd return near the beginning of the method
# Get h2f/f2h data using the first tensor operation in the layer
# (which is why order matters in the tensor table)
print(layer_name, self.__tensor_table[layer_name])
tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind]
if curr_layer == Driver.ApproxTypes.FP32:
time = tensor_op_row["h2f_time"]
energy = tensor_op_row["h2f_energy"]
elif curr_layer == Driver.ApproxTypes.FP16:
time = tensor_op_row["f2h_time"]
energy = tensor_op_row["f2h_energy"]
print("Quantization: (%f, %f)" % (time, energy))
return (time, energy)
def __run_promise_simulation(self, swing, layer_data):
layer_name = layer_data["Name"]
patch_factor = 1
if Driver.is_conv(layer_name):
rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \
/ (layer_data["Sh"] * layer_data["Sw"])
cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"]
rows_b = cols_a
cols_b = layer_data["Cout"]
patch_factor = layer_data["Kh"] * layer_data["Kw"]
elif Driver.is_fc(layer_name):
rows_a = layer_data["RA"]
cols_a = layer_data["CA"]
rows_b = cols_a
cols_b = layer_data["CB"]
else:
print("PROMISE can't run whatever this layer is.")
exit(1)
# Run promise simulator
# TODO need to print time and energy in the ptm runner so we can pipe it
output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \
str(cols_b), str(patch_factor), str(swing)], \
stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0]
total_time_energy = output.strip().split(',')
assert(len(total_time_energy) == 2)
print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1]))
return float(total_time_energy[0]), float(total_time_energy[1])
def __run_simulations(self):
if not os.path.isfile(self.__config_filename):
print("ERROR: %s was not found" % self.__config_filename)
exit(1)
config_file = open(self.__config_filename, "r")
line = config_file.readline().strip()
while line:
assert(line.startswith("+++++"))
config_name = config_file.readline().strip().split(' ')[0] # Next line = configuration name
print("CONFIGURATION")
line = config_file.readline().strip()
layer_ind = 0 # NOTE can also use the leftmost number in the currl ine
prev_layer = Driver.ApproxTypes.FP32
curr_layer = None
while not line.startswith("-----"):
layer_info = line.split(' ')
layer_data = self.__tensor_layers[layer_ind]
layer_name = layer_data["Name"]
if layer_info[1] == "promise":
print("Running layer %s on PROMISE" % layer_name)
curr_layer = Driver.ApproxTypes.PROMISE
swing = int(layer_info[3])
time, energy = self.__run_promise_simulation(swing, layer_data)
print(time, energy)
self.__aggregate_results[Driver.results_time_key][self.__config_count] += time
self.__aggregate_results[Driver.results_energy_key][self.__config_count] += energy
elif layer_info[1] == "gpu":
# Parse each individual tensor operation
# TODO not portable bc there can be multiple numbers after each approx later on
total_time = 0
total_energy = 0
tensor_ind = 0
for i in range(2, len(layer_info), 3):
tensor_op = layer_info[i]
approx_type = layer_info[i + 1]
approx_num = layer_info[i + 2] # only matters if perf
if approx_type == "fp16":
curr_layer = Driver.ApproxTypes.FP16
elif approx_type == "fp32":
curr_layer = Driver.ApproxTypes.FP32
elif approx_type == "perf":
curr_layer = DriverApproxTypes.PERF
else:
assert(False)
quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, tensor_ind, layer_data)
time, energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_ind, approx_num)
total_time += time
total_energy += energy
tensor_ind += 1
self.__aggregate_results[Driver.results_time_key][self.__config_count] += total_time
self.__aggregate_results[Driver.results_energy_key][self.__config_count] += total_energy
layer_ind += 1
line = config_file.readline().strip()
self.__config_count += 1
line = config_file.readline().strip()
config_file.close()
def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind, approx_num):
tensor_info = self.__tensor_table[layer_name][tensor_ind]
if curr_layer == Driver.ApproxTypes.FP32:
time = tensor_info["fp32_time"]
energy = tensor_info["fp32_energy"]
elif curr_layer == Driver.ApproxTypes.FP16:
time = tensor_info["fp16_time"]
energy = tensor_info["fp16_energy"]
elif curr_layer == Driver.ApproxTypes.PERF:
time = tensor_info["perf%s_energy" % approx_num]
energy = tensor_info["perf%s_energy" % approx_num]
print("GPU: (%f, %f)" % (time, energy))
return time, energy
def __display_results(self):
results_file = open(self.__results_filename, "w")
attributes_to_print = [Driver.results_time_key, Driver.results_energy_key]
for attribute in attributes_to_print:
results_file.write("%s\n" % attribute)
results_file.write("Configuration,Total,Improvement\n")
baseline_val = self.__aggregate_results[attribute][0]
print(baseline_val)
best_config = None
best_result = None
for config_ind in range(self.__config_count):
results_file.write("c%d" % config_ind)
time_or_energy_val = self.__aggregate_results[attribute][config_ind]
# Using repr to keep all decimal digits when writing to file
results_file.write(",%s" % repr(time_or_energy_val))
results_file.write(",%s\n" % repr(baseline_val / (time_or_energy_val + 0.0001)))
if not best_result or time_or_energy_val < best_result:
best_result = time_or_energy_val
best_config = config_ind
results_file.write("\nc%d,%s\n\n" % (best_config, repr(self.__aggregate_results[attribute][best_config])))
results_file.close()
if __name__ == "__main__":
if len(sys.argv) != 5:
print("Usage: python driver.py <layer info> <tensor info> <configurations> <results file>")
exit(1)
Driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]).driver()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment