diff --git a/hpvm/projects/hpvm-tensor-rt/bin/automated_tests.py b/hpvm/projects/hpvm-tensor-rt/bin/automated_tests.py deleted file mode 100644 index 8ac059ba0d0ac16dc354a367810dce5a31a15fc0..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/automated_tests.py +++ /dev/null @@ -1,136 +0,0 @@ - - -import os -import sys -from tuner_src import benchmarks -import subprocess - - -def readAccuracy(): - - f = open("final_accuracy") - acc_str = f.read() - return acc_str - - -def executeAndDumpOutput(binary_path): - - run_cmd = "./" + binary_path - output_file_path = "./test_dumps/" + binary_path - output_file = open(output_file_path, "a+") - - p = subprocess.Popen(run_cmd, shell=True, stdout=output_file) - retcode = p.wait() - - output_file.close() - - accuracy = readAccuracy() - print ("accuracy = ", accuracy) - - return retcode - - - - -def runTensorBinaries(test_benchmarks): - - # List of programs that faile during execution - For reporting - failed_progs = [] - for bench_id in test_benchmarks: - bench = test_benchmarks[bench_id] - print ("bench = ", bench.tuner_binary) - - retcode = executeAndDumpOutput(bench.tuner_binary) - - if retcode != 0: - failed_progs.append(bench.tuner_binary) - - return failed_progs - - - -def runLayerBinaries(test_benchmarks): - - # List of programs that faile during execution - For reporting - failed_progs = [] - - for bench_id in test_benchmarks: - bench = test_benchmarks[bench_id] - print ("bench = ", bench.promise_binary) - - retcode = executeAndDumpOutput(bench.promise_binary) - - if retcode != 0: - failed_progs.append(bench.promise_binary) - - return failed_progs - - - -def runFp16Binaries(test_benchmarks): - - # List of programs that faile during execution - For reporting - failed_progs = [] - for bench_id in test_benchmarks: - bench = test_benchmarks[bench_id] - print ("bench = ", bench.fp16_binary) - - retcode = executeAndDumpOutput(bench.fp16_binary) - - if retcode != 0: - failed_progs.append(bench.tuner_binary) - - return failed_progs - - - - -def runTests(test_benchmarks): - - if not os.path.exists("test_dumps"): - os.mkdir("test_dumps") - - tensor_failed_progs = runTensorBinaries(test_benchmarks) - layer_failed_progs = runLayerBinaries(test_benchmarks) - fp16_failed_progs = runFp16Binaries(test_benchmarks) - - failed_progs = tensor_failed_progs + layer_failed_progs + fp16_failed_progs - - total_tests = len(test_benchmarks) * 3 - succesful_tests = total_tests - len(failed_progs) - - - print ("\n\n\n **** Results Summary ***** \n\n\n") - - print ("Total_Tests = ", total_tests, "\n") - print ("Successful_Tests = ", succesful_tests, "\n") - print ("Failed_Tests = ", total_tests - succesful_tests, "\n") - - print ("\n\n --- Failing Tests = ", tensor_failed_progs + layer_failed_progs) - - print ("\n *Per-process logs dumped to ./test_dumps/") - - - -def checkEnvironment(): - - if not "CUDA_INCLUDE_PATH" in os.environ: - print ("ERROR: CUDA_INCLUDE_PATH NOT SET!") - sys.exit(0) - - if not "CUDNN_PATH" in os.environ: - print ("ERROR: CUDA_PATH NOT SET!") - sys.exit(0) - - - if not os.path.exists("promise_flags"): - print ("promise_flags NOT found -- CREATE promise_flags with flag assignment per-layer") - sys.exit(0) - - -if __name__ == "__main__": - - checkEnvironment() - - runTests(benchmarks.bench_tuner_data) - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/compute_install_times.py b/hpvm/projects/hpvm-tensor-rt/bin/compute_install_times.py deleted file mode 100644 index 6e59b72f023a7869e721ba62f923f5e4ca791113..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/compute_install_times.py +++ /dev/null @@ -1,116 +0,0 @@ - - -class TuningParameters: - def __init__(self): - self.iterations_measured = 150 - self.total_iterations = 30000 - - -tunerParams = TuningParameters() - - -class Benchmark: - def __init__(self): - self.binary_time = 0 - - -### All times are real profiled times on the Jetson Board -### Times are for 150 OpenTuner iterations on Jetson - -ResNet50 = Benchmark() -ResNet50.tuner_time = 3.85 * 100 * 150 # 50 images * 100 batches - -VGG16_ImageNet = Benchmark() -VGG16_ImageNet.tuner_time = 4.55 * 100 * 150 # 50 images * 100 batches - -AlexNet_ImageNet = Benchmark() -AlexNet_ImageNet.tuner_time = 0.7 * 100 * 150 - - -VGG16_CIFAR10 = Benchmark() -VGG16_CIFAR10.tuner_time = 1.54 * 60 * 60 # 50 images * 100 batches - - -VGG16_CIFAR100 = Benchmark() -VGG16_CIFAR100.tuner_time = 1.57 * 60 * 60 # 50 images * 100 batches - - -ResNet18 = Benchmark() -ResNet18.tuner_time = 0.52 * 60 * 60 # 12.9 measured for 1000 images - - -MobileNet = Benchmark() -MobileNet.tuner_time = 0.72 * 60 * 60 # 50 images * 100 batches - - -AlexNet_CIFAR10 = Benchmark() -AlexNet_CIFAR10.tuner_time = 0.67 * 60 * 60 # Time in hours - - -AlexNet2_CIFAR10 = Benchmark() -AlexNet2_CIFAR10.tuner_time = 0.19 * 60 * 60 - - -LeNet_CIFAR10 = Benchmark() -LeNet_CIFAR10.tuner_time = 0.11 * 60 * 60 - - - - - -def getInstallTime(Bench): - - ## We limit pareto configs to 50 after iterations of tuning complete - - tuner_invocations = tunerParams.total_iterations / tunerParams.iterations_measured - - extrapolated_time = tuner_invocations * Bench.tuner_time - - time_hours = extrapolated_time / (60 * 60) - - return time_hours - - - -# Routine to compute extrapolated tuning times -def computeExtrapolatedInstallTime(): - - - resnet50_time = getInstallTime(ResNet50) - print ("*** ResNet50 time (hrs) = ", resnet50_time) - - resnet18_time = getInstallTime(ResNet18) - print ("*** ResNet18 time (hrs) = ", resnet18_time) - - mobilenet_time = getInstallTime(MobileNet) - print ("*** MobileNet time (hrs) = ", mobilenet_time) - - vgg16_img_time = getInstallTime(VGG16_ImageNet) - print ("*** VGG16-Imagenet time (hrs) = ", vgg16_img_time) - - vgg16_cifar10_time = getInstallTime(VGG16_CIFAR10) - print ("*** VGG16-CIFAR10 time (hrs) = ", vgg16_cifar10_time) - - vgg16_cifar100_time = getInstallTime(VGG16_CIFAR100) - print ("*** VGG16-CIFAR100 time (hrs) = ", vgg16_cifar100_time) - - alexnet_img_time = getInstallTime(AlexNet_ImageNet) - print ("*** AlexNet-Imagenet time (hrs) = ", alexnet_img_time) - - alexnet_cifar10_time = getInstallTime(AlexNet_CIFAR10) - print ("*** AlexNet-CIFAR10 time (hrs) = ", alexnet_cifar10_time) - - alexnet2_cifar10_time = getInstallTime(AlexNet2_CIFAR10) - print ("*** AlexNet2-CIFAR10 time (hrs) = ", alexnet2_cifar10_time) - - lenet_cifar10_time = getInstallTime(LeNet_CIFAR10) - print ("*** LeNet-CIFAR10 time (hrs) = ", lenet_cifar10_time) - - - - - -if __name__ == "__main__": - - computeExtrapolatedInstallTime() - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/error_sensitivity.py b/hpvm/projects/hpvm-tensor-rt/bin/error_sensitivity.py deleted file mode 100644 index 9f2ffb3eacd3cb81bcefb4b44a48f1d0a8a8356d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/error_sensitivity.py +++ /dev/null @@ -1,139 +0,0 @@ - - -import subprocess -import os -import operator - - -def constructTunerFile(num_flags, tensor_id, error_level, default_error): - - f = open("opentuner_flags", "w+") - - for i in range(num_flags): - if i == tensor_id: - f.write(str(error_level) + "\n") - else: - f.write(str(default_error) + "\n") - - f.close() - - - -def runAndTestError(binary_name, gold_acc): - - num_runs = 20 - - binary_name = "./" + binary_name - FNULL = open(os.devnull, 'wb') - p = subprocess.Popen([binary_name, str(num_runs)], stdout = FNULL) - p.wait() - - f = open("run_accuracies.txt") - - total_err = 0.0 - for x in f: - acc = float(x.strip()) - total_err += (gold_acc - acc) - - avg_err = total_err / num_runs - - return avg_err - - - - -def test_sensitivity(Bench): - - tensor_errors = [] - - error_levels = [6, 9, 12, 15] - num_flags = Bench.num_flags - - for tensor_id in range(num_flags): - total_error = 0 - for error_level in error_levels: - constructTunerFile(num_flags, tensor_id, error_level, 0) - error = runAndTestError(Bench.tuner_binary, Bench.tuner_accuracy) - print (tensor_id, error_level, error) - total_error += error - - avg_error = total_error / len(error_levels) - - tensor_errors.append([tensor_id, avg_error]) - - - print ("\n\n*** Per-Tensor Avg Errors \n\n") - - f_name = Bench.base_dir + "/tensor_errors_1000.txt" - f = open(f_name, "w+") - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - f.write(str(i) + "\t" + str(tensor_errors[i][1]) + "\n") - - f.close() - - f_name = Bench.base_dir + "/tensor_errors_ranked_1000.txt" - f2 = open(f_name, "w+") - tensor_errors.sort(key=operator.itemgetter(1)) - - - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - - f2.write(str(tensor_errors[i][0]) + "\t" + str(tensor_errors[i][1]) + "\n") - - - f2.close() - - - -def test_sensitivity2(Bench): - - num_flags = Bench.num_flags - - constructTunerFile(num_flags, 0, 3, 3) - error = runAndTestError(Bench.tuner_binary, Bench.tuner_accuracy) - - ref_acc = Bench.tuner_accuracy - error - print ("*** Gold accuracy = ", Bench.tuner_accuracy, " Ref accuracy = ", ref_acc, " *** \n\n") - - - tensor_errors = [] - - error_levels = [6, 9, 12, 15] - - for tensor_id in range(num_flags): - total_error = 0 - for error_level in error_levels: - constructTunerFile(num_flags, tensor_id, error_level, 3) - error = runAndTestError(Bench.tuner_binary, ref_acc) - print (tensor_id, error_level, error) - total_error += error - - avg_error = total_error / len(error_levels) - - tensor_errors.append([tensor_id, avg_error]) - - - print ("\n\n*** Per-Tensor Avg Errors \n\n") - - f_name = Bench.base_dir + "/tensor_composite_errors.txt" - f = open(f_name, "w+") - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - f.write(str(i) + "\t" + str(tensor_errors[i][1]) + "\n") - - f.close() - - f_name = Bench.base_dir + "/tensor_composite_errors_ranked.txt" - f2 = open(f_name, "w+") - tensor_errors.sort(key=operator.itemgetter(1)) - - - for i in range(len(tensor_errors)): - print (i, tensor_errors[i][1]) - - f2.write(str(tensor_errors[i][0]) + "\t" + str(tensor_errors[i][1]) + "\n") - - - f2.close() diff --git a/hpvm/projects/hpvm-tensor-rt/bin/exhaustive.py b/hpvm/projects/hpvm-tensor-rt/bin/exhaustive.py deleted file mode 100644 index bae38bf7e497897ae3db4e12dce48914903739fb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/exhaustive.py +++ /dev/null @@ -1,140 +0,0 @@ - -import os -import sys -import shutil -import subprocess -import shutil - - - -class Benchmark: - def __init__(self): - self.binary = "" - self.num_flags = 4 - - - -Alexnet1 = Benchmark() -Alexnet1.binary = "./lenet_keras_promise" -Alexnet1.accuracy = 98.8 -Alexnet1.flags = [[8], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4] ] - - -Alexnet2 = Benchmark() -Alexnet2.binary = "./fc4_clipped_promise" -Alexnet2.accuracy = 93.72 -Alexnet2.flags = [[3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7], [2, 3, 4, 5, 6, 7] ] - - - -def dumpConfig(conf_flags, dir_prefix, file_id): - - shutil.copy("promise_flags", dir_prefix + "/" + str(file_id) + ".txt") - - -def dumpFinalConfigs(final_confs, dir_prefix): - - f = open(dir_prefix + "/final_confs.txt", "w+") - for conf in final_confs: - ind = 0 - for flag in conf: - f.write(str(flag)) - if ind < len(conf) - 1: - f.write(",") - - ind += 1 - f.write("\n") - - f.close() - - -def getAccuracy(): - - file = open("final_accuracy", "r") - acc_str = file.read() - file.close() - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - print accuracy - return accuracy - - - - -def testConfidence(binary, target_acc, total_runs): - - for i in range(total_runs): - p = subprocess.Popen("./" + binary, shell=False) - p.wait() - acc = getAccuracy() - if acc < target_acc: - return False - - return True - - -def singleRun(binary): - - p = subprocess.Popen("./" + binary, shell=False) - p.wait() - - return getAccuracy() - - - -def createPromiseFile(conf_flags): - - f = open("promise_flags", "w+") - for flag in conf_flags: - f.write(str(flag) + "\n") - f.close() - - - -def runExhaustive(Bench, threshold, dir_prefix): - - flags = Bench.flags - - accepted_confs = [] - ind = 0 - for flag1 in flags[0]: - for flag2 in flags[1]: - for flag3 in flags[2]: - for flag4 in flags[3]: - print (flag1, flag2, flag3, flag4) - conf_flags = [] - conf_flags.append(flag1) - conf_flags.append(flag2) - conf_flags.append(flag3) - conf_flags.append(flag4) - - createPromiseFile(conf_flags) - - accuracy = singleRun(Bench.binary) - target_acc = Bench.accuracy - threshold - - if accuracy > target_acc: - if testConfidence(Bench.binary, target_acc, 3): - dumpConfig(conf_flags, dir_prefix, ind) - accepted_confs.append(conf_flags) - - ind += 1 - - dumpFinalConfigs(accepted_confs, dir_prefix) - - - -if __name__ == "__main__": - - #runExhaustive(Alexnet1, 1.0, "lenet_1") - #runExhaustive(Alexnet1, 2.0, "lenet_2") - - runExhaustive(Alexnet2, 1.0, "fc4_1") - runExhaustive(Alexnet2, 2.0, "fc4_2") - - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/extractQuantRange.py b/hpvm/projects/hpvm-tensor-rt/bin/extractQuantRange.py deleted file mode 100644 index 0b7f09d92e91894d284b40cc0bd2d346c08e36c7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/extractQuantRange.py +++ /dev/null @@ -1,42 +0,0 @@ - - -import sys - - -if __name__ == "__main__": - - f = open(sys.argv[1], "r") - f2 = open("quant_ranges.txt", "w+") - - layer_line = False - for x in f: - if "ConvLayer_PROMISE" in x or "FCLayer_PROMISE" in x or layer_line == True: - if layer_line == True: - layer_line = False - else: - layer_line = True - - print x - toks = x.split(",") - - for tok in toks: - tok = tok.strip() - tok_val = "" - try: - tok_val = float(tok) - try: - tok_val = int(tok) - except: - print (tok_val) - f2.write(str(tok_val) + " ") - #f2.write("tok_val = ", tok_val + " ") - except: - continue - - f2.write("\n") - - - f.close() - f2.close() - - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/get_power_stats.py b/hpvm/projects/hpvm-tensor-rt/bin/get_power_stats.py deleted file mode 100644 index e81cf10ece72c43457de718365bd2017e1684ab2..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/get_power_stats.py +++ /dev/null @@ -1,79 +0,0 @@ - -import sys -import numpy as np -import subprocess - - -def get_avg_power(f_name): - - f = open(f_name, "r") - - gpu_power = [] - ddr_power = [] - sys_power = [] - - for x in f: - toks = x.split() - - gpu_power.append(float(toks[1])) - ddr_power.append(float(toks[2])) - sys_power.append(float(toks[3])) - - - avg_gpu_power = np.mean(gpu_power) - avg_ddr_power = np.mean(ddr_power) - avg_sys_power = np.mean(sys_power) - - print ("** avg_gpu_power = ", avg_gpu_power, " avg_ddr_power = ", \ - avg_ddr_power, " avg_sys_power = ", avg_sys_power) - - return (avg_gpu_power, avg_ddr_power, avg_sys_power) - - -#avail_frequencies = [140250000, 229500000, 318750000, 408000000, 497250000, -# 586500000, 675750000, 765000000, 854250000, -# 943500000, 1032750000, 1122000000, 1211250000, 1300500000]; - - -avail_frequencies = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]; - - -if __name__ == "__main__": - - - programs = ["alexnet_promise", "alexnet2_promise", "vgg16_cifar10_promise", "resnet18_promise", "resnet50_imagenet_promise", "mobilenet_promise", "vgg16_imagenet_promise"] - - for binary_path in programs: - - power_avgs = [] - power_freq_file = "power_data/" + binary_path + "/power_vals.txt" - fout = open(power_freq_file, "w+") - - for frequency in avail_frequencies: - - print (frequency) - poll_path = "./poll" - iterations = 10 - - poll_cmd = poll_path + " " + str(frequency) # sudo needed for frequency change - subprocess.call(poll_cmd, shell=True) - - - binary_path = "./" + binary_path - power_file = " power_data/" + binary_path + "/power.out." + str(frequency) - profile_cmd = "../../system_profiler/build/offline_profiler " + binary_path + " " + \ - str(iterations) + " tensor.out." + str(frequency) + power_file - - subprocess.call(profile_cmd, shell=True) - - - #avg_power = get_avg_power("power.out." + str(frequency)) - avg_power = get_avg_power(power_file) - power_avgs.append(avg_power) - - fout.write(str(avg_power[0]) + " " + str(avg_power[1]) + " " + str(avg_power[2]) + "\n") - print (avg_power) - - - print (power_avgs) - fout.close() diff --git a/hpvm/projects/hpvm-tensor-rt/bin/install_runtime.sh b/hpvm/projects/hpvm-tensor-rt/bin/install_runtime.sh deleted file mode 100644 index 33a54cd0de626113e5cf11e2f6a6928d4fa384eb..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/install_runtime.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -export HPVM_TENSOR_RT_HOME=/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/ -export PATH=/home/hsharif3/Gitlab/hpvm/build/bin/:$PATH - -clang++ -I/software/cuda-9.1/include -emit-llvm -c ${HPVM_TENSOR_RT_HOME}/tensor_runtime/include/tensor_signatures.cc -o ${HPVM_TENSOR_RT_HOME}/lib/tensor_runtime.bc -llvm-dis --version -llvm-dis ${HPVM_TENSOR_RT_HOME}/lib/tensor_runtime.bc - - - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/mark_depthwise.py b/hpvm/projects/hpvm-tensor-rt/bin/mark_depthwise.py deleted file mode 100644 index c64a9f242fcf80b585c5862ceef16b8fb8ce50a5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/mark_depthwise.py +++ /dev/null @@ -1,48 +0,0 @@ - -import sys - - -def loadLayerDesc(layer_desc_file): - - layer_desc = [] - f = open(layer_desc_file) - for x in f: - vals = x.split() - layer_desc.append(vals) - - return layer_desc - - - -if __name__ == "__main__": - - if len(sys.argv) < 4: - print ("Usage: python mark_depthwise.py $layer_file $input_conf $output_conf") - - layer_file_name = sys.argv[1] - input_file_name = sys.argv[2] - output_file_name = sys.argv[3] - - - layer_desc = loadLayerDesc(layer_file_name) - - f_in = open(input_file_name) - f_out = open(output_file_name, "w+") - - for x in f_in: - it = 0 - confs = x.split(",") - print confs - for conf in confs: - print (" it = ", it, " layer_desc[it] = ", layer_desc[it], " \n") - if layer_desc[it][0] == "depthwise_conv": - f_out.write("9,") - else: - f_out.write(conf) - if it < len(confs) - 1: - f_out.write(",") - - it += 1 - - f_in.close() - f_out.close() diff --git a/hpvm/projects/hpvm-tensor-rt/bin/measure_conf_accuracy.py b/hpvm/projects/hpvm-tensor-rt/bin/measure_conf_accuracy.py deleted file mode 100644 index 4ca1f3f52e59498725414f37e56e06e5e74f1953..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/measure_conf_accuracy.py +++ /dev/null @@ -1,316 +0,0 @@ - -import os -import sys -import shutil -import subprocess -import shutil -import numpy as np - - - -class Benchmark: - def __init__(self): - self.binary = "" - - - -benchmarks = {} - -Alexnet1 = Benchmark() -Alexnet1.binary = "./lenet_keras_promise" -Alexnet1.accuracy = 98.7 -Alexnet1.loss1_conf = "8 8 8 8,4,4,7" -Alexnet1.loss2_conf = "8 8 8 8,3,4,7" - -benchmarks["lenet"] = Alexnet1 - - -Alexnet2 = Benchmark() -Alexnet2.binary = "./fc4_clipped_promise" -Alexnet2.accuracy = 93.72 -Alexnet2.loss1_conf = "7,7,6,7" -Alexnet2.loss2_conf = "4,4,4,5" - -benchmarks["fc4"] = Alexnet2 - - -Alexnet3 = Benchmark() -Alexnet3.binary = "./alexnet_valid" -Alexnet3.accuracy = 79.16 -Alexnet3.loss1_conf = "8 8 8 8,6,6,6,7,7" -Alexnet3.loss2_conf = "8 8 8 8,4,4,6,4,7" - -benchmarks["alexnet"] = Alexnet3 - - -Alexnet4 = Benchmark() -Alexnet4.binary = "./alexnet2_valid" -Alexnet4.accuracy = 85.09 -Alexnet4.loss1_conf = "9 9 9,7,7,7,9 9 9,7,9 9" -Alexnet4.loss2_conf = "9 9 9,7,7,6,8 8 8,6,9 9" - -benchmarks["alexnet2"] = Alexnet4 - - -Alexnet5 = Benchmark() -Alexnet5.binary = "./resnet18_valid" -Alexnet5.accuracy = 89.44 -Alexnet5.loss1_conf = "9 9 9,8 8 8,8 8,8,8,8 8 8,7,8,8,8 8 8,7,8,8,8 8 8,8 8,8 8,8,8,8 8 8,7,8,8,8 8 8,8 8,8,8,8 8 8,8 8,8 8,8,8,8 8 8,8 8,8,8,8 8 8,8 8,8,8,8,8 8" -Alexnet5.loss2_conf = "9 9 9,8 8 8,8 8,8,8,8 8 8,7,8,8,8 8 8,7,8,8,8 8 8,8 8,8 8,8,8,8 8 8,7,8,8,7,8 8,8,8,8 8 8,8 8,8 8,8,8,8 8 8,8 8,8,8,8 8 8,7,8,8,8,8 8" - -benchmarks["resnet"] = Alexnet5 - - - -Alexnet6 = Benchmark() -Alexnet6.binary = "./vgg16_cifar10_valid" -Alexnet6.accuracy = 89.41 -Alexnet6.loss1_conf = "9 9 9,7,7,7,9 9 9,8 8 8,7,8 8 8,7,7,8 8 8,8 8 8,7,9 9 9,9 9" -Alexnet6.loss2_conf = "9 9 9,5,5,8 8 8 8,4,6,4,7,8 8 8,4,4,4,7,8 8 8,8 8" - -benchmarks["vgg16_cifar10"] = Alexnet6 - - -Alexnet7 = Benchmark() -Alexnet7.binary = "./vgg16_cifar100_valid" -Alexnet7.accuracy = 66.19 -Alexnet7.loss1_conf = "9 9 9,8 8 8 8,8 8 8,8 8 8 8,8 8 8,7,7,7,8 8 8,8 8 8 8,7,7,8 8 8 8,8 8 8,8 8" -Alexnet7.loss2_conf = "9 9 9,8 8 8 8,8 8 8,7,8 8 8,8 8 8,8 8 8 8,6,6,7,8 8 8,7,6,8 8 8,8 8" - -benchmarks["vgg16_cifar100"] = Alexnet7 - - - -Alexnet8 = Benchmark() -Alexnet8.binary = "./pipeline_GEOM_valid" -Alexnet8.loss1_conf = "8 8,8 8 8,8 8,7" -Alexnet8.loss2_conf = "8 8,8 8 8,8 8,6" - -benchmarks["pipeline_GEOM"] = Alexnet8 - - - -Alexnet9 = Benchmark() -Alexnet9.binary = "./pipeline_GEMO_valid" -Alexnet9.loss1_conf = "8 8,8 8 8,8 8,8 8" -Alexnet9.loss2_conf = "7,8 8 8,8 8,8 8" - -benchmarks["pipeline_GEMO"] = Alexnet9 - - - -Alexnet10 = Benchmark() -Alexnet10.binary = "./pipeline_GEO_valid" -Alexnet10.loss1_conf = "8 8,8 8 8,8 8" -Alexnet10.loss2_conf = "8 8,8 8 8,8 8" - -benchmarks["pipeline_GEO"] = Alexnet10 - - - -Alexnet11 = Benchmark() -Alexnet11.binary = "./pipeline_GSM_valid" -Alexnet11.loss1_conf = "8 8,8 8,7" -Alexnet11.loss2_conf = "7,8 8,6" - -benchmarks["pipeline_GSM"] = Alexnet11 - - - -Alexnet12 = Benchmark() -Alexnet12.binary = "./pipeline_GSME_valid" -Alexnet12.loss1_conf = "8 8,8 8,8 8,8 8 8" -Alexnet12.loss2_conf = "7,8 8,8 8,8 8 8" - -benchmarks["pipeline_GSME"] = Alexnet12 - - - -def createPromiseFile(conf_flag_str): - - conf_flags = conf_flag_str.split(",") - f = open("promise_flags", "w+") - for flag_str in conf_flags: - flags = flag_str.split() - f.write(str(flags[0]) + "\n") - f.close() - - -def getRunAccuracies(): - - run_accuracies = [] - file = open("run_accuracies.txt", "r") - file_str = file.read() - - for flag in file_str.split("\n"): - print ("*** flag = ", flag) - flag = flag.strip() - if flag == "": - continue - run_accuracies.append(float(flag)) - - file.close() - - return run_accuracies - - - -def testConfidence(binary): - - p = subprocess.Popen("./" + binary, shell=False) - p.wait() - run_accuracies = getRunAccuracies() - - return np.mean(run_accuracies), np.std(run_accuracies) - - - -def getAccuracy(): - - file = open("final_accuracy", "r") - acc_str = file.read() - file.close() - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return 20 - - #print accuracy - return accuracy - - -def getPSNR(): - - file = open("avg_psnr", "r") - acc_str = file.read() - file.close() - accuracy = float(acc_str) - - try: - accuracy = float(acc_str) - except: - return -100 - - #print accuracy - return accuracy - - - - -def testPSNRConfidence(binary, total_runs): - - run_accuracies = [] - run_psnr = [] - for i in range(total_runs): - p = subprocess.Popen("./" + binary, shell=False) - p.wait() - acc = getAccuracy() - psnr = getPSNR() - run_accuracies.append(acc) - run_psnr.append(psnr) - - return np.mean(run_accuracies), np.std(run_accuracies), np.mean(run_psnr), np.std(run_psnr) - - - -def runBench(bench_name, dir_prefix): - - Bench = benchmarks[bench_name] - binary = Bench.binary - accuracy = Bench.accuracy - - createPromiseFile(Bench.loss1_conf) - mean, std = testConfidence(binary) - print ("mean = ", mean, " std = ", std) - - - f = open(dir_prefix + "/" + binary + "_loss1.txt" ,"w+") - f.write("mean = " + str(mean) + " std = " + str(std)) - f.close() - - createPromiseFile(Bench.loss2_conf) - mean, std = testConfidence(binary) - print ("mean = ", mean, " std = ", std) - - - f = open(dir_prefix + "/" + binary + "_loss2.txt" ,"w+") - f.write("mean = " + str(mean) + " std = " + str(std)) - f.close() - - - - - -def gen30dbFile(): - - f = open("psnr.txt", "w+"); - f.write("30"); - f.close() - - -def gen20dbFile(): - - f = open("psnr.txt", "w+"); - f.write("20"); - f.close() - - - -def runPSNRBench(bench_name, dir_prefix): - - Bench = benchmarks[bench_name] - binary = Bench.binary - - gen30dbFile() - createPromiseFile(Bench.loss1_conf) - mean, std, psnr_mean, psnr_std = testPSNRConfidence(binary, 20) - print ("mean = ", mean, " std = ", std) - - - f = open(dir_prefix + "/" + binary + "_loss30.txt" ,"w+") - f.write("mean = " + str(mean) + " std = " + str(std)) - f.write(" psnr_mean = " + str(psnr_mean) + " psnr_std = " + str(psnr_std)) - f.close() - - - gen20dbFile() - createPromiseFile(Bench.loss2_conf) - mean, std, psnr_mean, psnr_std = testPSNRConfidence(binary, 20) - print ("mean = ", mean, " std = ", std) - - f = open(dir_prefix + "/" + binary + "_loss20.txt" ,"w+") - f.write("mean = " + str(mean) + " std = " + str(std)) - f.write(" psnr_mean = " + str(psnr_mean) + " psnr_std = " + str(psnr_std)) - f.close() - - - - - - -def runDNNs(): - - #runBench("fc4", "avg_accuracies") - #runBench("lenet", "avg_accuracies") - #runBench("alexnet", "avg_accuracies") - #runBench("alexnet2", "avg_accuracies") - #runBench("resnet", "avg_accuracies") - #runBench("vgg16_cifar10", "avg_accuracies") - #runBench("vgg16_cifar100", "avg_accuracies") - - runPSNRBench("pipeline_GEOM", "avg_accuracies") - runPSNRBench("pipeline_GEMO", "avg_accuracies") - runPSNRBench("pipeline_GEO", "avg_accuracies") - runPSNRBench("pipeline_GSM", "avg_accuracies") - runPSNRBench("pipeline_GSME", "avg_accuracies") - - - - -if __name__ == "__main__": - - runDNNs() - - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/measure_confidence.py b/hpvm/projects/hpvm-tensor-rt/bin/measure_confidence.py deleted file mode 100644 index 74aa23c71aa3e81fc9422a3cc73ba3b69ed98c8a..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/measure_confidence.py +++ /dev/null @@ -1,125 +0,0 @@ - -import argparse -import os -import subprocess -import sys - - -def getAccuracy(file_name): - - if not os.path.exists(file_name): - print("final_accuracy file not found ") - sys.exit(0) - - file = open(file_name, "r") - acc_str = file.read() - accuracy = float(acc_str) - print accuracy - return accuracy - - -total_runs = 12.0 -skip_lines = 0 - - -def test_func(): - print "test_func" - sys.exit(0) - - -def do_multiple_runs(binary_name, accuracy_threshold, confidence_threshold): - - #total_runs = 100.0 - successful_runs = 0.0 - total_acc = 0 - - for i in range(int(total_runs)): - subprocess.call(binary_name) - accuracy = getAccuracy("final_accuracy") - total_acc += accuracy - - if accuracy > accuracy_threshold: - successful_runs += 1 - - confidence = (successful_runs / total_runs) * 100.0 - print("confidence = ", confidence) - avg_acc = total_acc / total_runs - print("average accuracy = ", avg_acc) - - return confidence, avg_acc - - -def compute_confidence(binary_name, accuracy, confidence, result_dir, output_dir): - - confidence_list = [] - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - for file_name in file_names: - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - f = open(result_dir + "/" + file_name) - tuner_file = open("opentuner_flags", "w+") - - index = 0 - results_str = "" - for x in f: - if index >= skip_lines: - error_knob = int(float(x.split()[1])) - print error_knob - tuner_file.write(str(error_knob) + "\n") - - results_str += x - index += 1 - - tuner_file.close() - - run_confidence, avg_accuracy = do_multiple_runs(binary, accuracy, confidence) - - if run_confidence > 90: - f2 = open(output_dir + "/" + file_name, "w+") - f2.write("total_runs=" + str(total_runs) + "\t confidence=" + str(run_confidence) + "\t avg_accuracy=" + str(avg_accuracy) + "\n") - f2.write(results_str) - f2.close() - - conf_result = (run_confidence, avg_accuracy, file_name) - confidence_list.append(conf_result) - - return confidence_list - - -if __name__ == "__main__": - - argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy') - argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations') - argparser.add_argument('--output-dir', help='Directory for storing output directory') - argparser.add_argument('--binary', help='Binary name to run') - argparser.add_argument('--accuracy', type=float, help='Accuracy constraint') - argparser.add_argument('--confidence', type=float, help='Confidence threshold') - - - args = argparser.parse_args() - result_dir = args.result_dir - output_dir = args.output_dir - binary = args.binary - accuracy = args.accuracy - confidence = args.confidence - - confidence_list = compute_confidence(binary, accuracy, confidence, result_dir, output_dir) - #print confidence_list - - sorted_list = sorted(confidence_list, key = lambda tup: tup[0], reverse=True) - - output_file = open(output_dir + "/confidence_summary.txt", "w+") - for x in sorted_list: - output_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[2]) + "\n") - - output_file.close() - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/mergeTensorOpAndErrors.py b/hpvm/projects/hpvm-tensor-rt/bin/mergeTensorOpAndErrors.py deleted file mode 100644 index 3c9ea9de2854ed133350950d3995f459120176de..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/mergeTensorOpAndErrors.py +++ /dev/null @@ -1,60 +0,0 @@ - - - -if __name__ == "__main__": - - dnn_benchs = [] - dnn_benchs.append("fc4") - dnn_benchs.append("lenet_keras") - dnn_benchs.append("alexnet_cifar10") - dnn_benchs.append("alexnet2_cifar10") - dnn_benchs.append("vgg16_cifar10") - dnn_benchs.append("vgg16_cifar100") - dnn_benchs.append("resnet18_cifar10") - dnn_benchs.append("mobilenet") - dnn_benchs.append("mobilenet_shallow") - - - for bench in dnn_benchs: - errors_file1 = "build_tuner/tuner_results/" + bench + "/tensor_errors_1000.txt" - errors_file2 = "build_test/tuner_results/" + bench + "/tensor_composite_errors.txt" - ops_file = "build_tuner/tuner_results/" + bench + "/op_names.txt" - - f1 = open(errors_file1) - f2 = open(errors_file2) - f3 = open(ops_file) - - fout = open("build_tuner/tuner_results/" + bench + "/tensor_op_errors.txt", "w+") - - bench_data = [] - for x in f3: - op_name = x.strip() - bench_data.append([op_name, 0.0, 0.0]) - - it = 0 - for x in f1: - if it >= len(bench_data): - break - toks = x.split() - error1 = float(toks[1]) - print error1 - bench_data[it][1] = error1 - it += 1 - - it = 0 - for x in f2: - if it >= len(bench_data): - break - toks = x.split() - error2 = float(toks[1]) - bench_data[it][2] = error2 - it += 1 - - for i in range(len(bench_data)): - fout.write(str(i) + "\t" + bench_data[i][0] + "\t" + str(bench_data[i][1]) + "\t" + str(bench_data[i][2]) + "\n") - - fout.close() - f1.close() - f2.close() - f3.close() - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/read_weight_ranges.py b/hpvm/projects/hpvm-tensor-rt/bin/read_weight_ranges.py deleted file mode 100644 index c54d7dfcddc161aa20dd8378d2652d32c4905e38..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/read_weight_ranges.py +++ /dev/null @@ -1,43 +0,0 @@ - - -import numpy as np -import os -import struct - - -def read_value_range(file_name): - - print file_name - f = open(file_name, "rb") - - bytes = os.stat(file_name).st_size - elems = bytes/4 - - data_arr = struct.unpack('f'*elems, f.read(4*elems)) - - print (np.amin(data_arr)) - print (np.amax(data_arr)) - - - - -if __name__ == "__main__": - - dir_prefix = "model_params/alexnet2_cifar10/" - print dir_prefix - read_value_range(dir_prefix + "norm_cifar_input.bin") - read_value_range(dir_prefix + "conv1.bin") - read_value_range(dir_prefix + "conv1_bias.bin") - read_value_range(dir_prefix + "conv2.bin") - read_value_range(dir_prefix + "conv2_bias.bin") - read_value_range(dir_prefix + "conv3.bin") - read_value_range(dir_prefix + "conv3_bias.bin") - read_value_range(dir_prefix + "conv4.bin") - read_value_range(dir_prefix + "conv4_bias.bin") - read_value_range(dir_prefix + "conv5.bin") - read_value_range(dir_prefix + "conv5_bias.bin") - read_value_range(dir_prefix + "conv6.bin") - read_value_range(dir_prefix + "conv6_bias.bin") - read_value_range(dir_prefix + "fc1.bin") - read_value_range(dir_prefix + "fc1_bias.bin") - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/replace_half_calls.py b/hpvm/projects/hpvm-tensor-rt/bin/replace_half_calls.py deleted file mode 100644 index b75a7d4750074cf6234151ae21a8bff5af1050d5..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/replace_half_calls.py +++ /dev/null @@ -1,35 +0,0 @@ - - -import sys - - -if __name__ == "__main__": - - if len(sys.argv) < 3: - print ("Usage: python replace_half_calls.py in_file.cc half_out_file.cc \n") - sys.exit(0) - - file_name = sys.argv[1] - out_file_name = sys.argv[2] - - f = open(file_name) - str = f.read() - - str = str.replace("tensorConvolution", "tensorHalfConvolution") - str = str.replace("tensorAdd", "tensorHalfAdd") - str = str.replace("tensorRelu", "tensorHalfRelu") - str = str.replace("tensorRelu2", "tensorHalfRelu2") - str = str.replace("tensorTanh", "tensorHalfTanh") - str = str.replace("tensorPooling", "tensorHalfPooling") - str = str.replace("tensorGemmGPU", "tensorHalfGemmGPU") - - print (str) - - f.close() - - f2 = open(out_file_name, "w+") - - f2.write(str) - - f2.close() - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/run_dyn.py b/hpvm/projects/hpvm-tensor-rt/bin/run_dyn.py deleted file mode 100644 index 83956051bef2a868f7f685f3d471e5d5f84ac03d..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/run_dyn.py +++ /dev/null @@ -1,42 +0,0 @@ -from pathlib import Path - -name_ranges = { - "canny_pareto": list(range(11, 28 + 1)), - "blend_pareto": list(range(11, 20 + 1)) -} -iters = 20 - -def run_binary(config_prefix, binary_file, vals): - from subprocess import run - from os import rename - from shutil import copy - from tqdm import tqdm - - out_dir = Path("run_data_{}".format(binary_file)) - out_dir.mkdir() - for i in tqdm(vals): - config = (config_prefix/"{}.txt".format(binary_file)).as_posix() - copy(config, "tuner_confs.txt") - with open("slowdowns.txt", 'w') as f: - f.write('\n'.join((str(i / 10) for _ in range(iters)))) - command = "./{} >out 2>&1".format(binary_file) - tqdm.write("{}; {}".format(command, i)) - run(command, shell=True, check=True) - out_path = (out_dir/"out{}".format(i)).as_posix() - profile_path = (out_dir/"profile_info_out{}.txt".format(i)).as_posix() - rename("out", out_path) - rename("profile_info_0.txt", profile_path) - # rename("final_accuracy", out_dir/"final_accuracy{}".format(i)) - - -def main(): - from sys import argv - - config_prefix = Path(argv[1]) - for binary_file, vals in name_ranges.items(): - print(binary_file) - run_binary(config_prefix, binary_file, vals) - - -if __name__ == "__main__": - main() diff --git a/hpvm/projects/hpvm-tensor-rt/bin/select_top_results.py b/hpvm/projects/hpvm-tensor-rt/bin/select_top_results.py deleted file mode 100644 index 898b4c4f42211e010b1544039cbd4b4125c03b92..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/select_top_results.py +++ /dev/null @@ -1,89 +0,0 @@ - - -import argparse -import sys -import os - - -log_index = 7 -linear_index = 8 -quad_index = 9 - -top_k = 10 - -def dump_results(sorted_list, k, result_dir, sub_dir): - - ref_dir = result_dir + "/" + sub_dir - if not os.path.exists(ref_dir): - os.mkdir(ref_dir) - - for i in range(k): - file_name = sorted_list[i][1] - file_name = ref_dir + "/" + file_name + "_rank_" + str(i) - f = open(file_name, "w+") - f.write(str(sorted_list[i][2]) + "\t") - f.write(str(sorted_list[i][3]) + "\t") - f.write(str(sorted_list[i][4]) + "\n") - f.write(sorted_list[i][0]) - f.close() - - - - -def select_top_results(result_dir): - - if not os.path.exists(result_dir): - print("Path does not exist") - sys.exit(0) - - file_names = os.listdir(result_dir) - print file_names - - results_arr = [] - - for file_name in file_names: - # Skip sub-directories - if os.path.isdir(result_dir + "/" + file_name): - continue - - log_result = 0.0 - linear_result = 0.0 - quad_result = 0.0 - file_str = "" - - f = open(result_dir + "/" + file_name) - for x in f: - words = x.split() - log_result += float(words[log_index]) - linear_result += float(words[linear_index]) - quad_result += float(words[quad_index]) - file_str += x - - - file_result = (file_str, file_name, log_result, linear_result, quad_result) - results_arr.append(file_result) - - - sorted_list = sorted(results_arr, key = lambda tup: tup[2]) - dump_results(sorted_list, top_k, result_dir, "log") - - sorted_list = sorted(results_arr, key = lambda tup: tup[3]) - dump_results(sorted_list, top_k, result_dir, "linear") - - sorted_list = sorted(results_arr, key = lambda tup: tup[4]) - dump_results(sorted_list, top_k, result_dir, "quad") - - - -if __name__ == "__main__": - - argparser = argparse.ArgumentParser(description='runs best configs to get high confidence on accuracy') - argparser.add_argument('--result-dir', help='Directory containing OpenTuner configurations') - - args = argparser.parse_args() - result_dir = args.result_dir - - select_top_results(result_dir) - - - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/setup_cuda_paths.sh b/hpvm/projects/hpvm-tensor-rt/bin/setup_cuda_paths.sh deleted file mode 100644 index 9f45a76033c7e82728a2bdaf0f82d2bfe9230272..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/setup_cuda_paths.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -# NOTE: Set Paths to local installation paths -# NOTE: Module cuda-took/9.1 not supported on non-EngrIT systems -module load cuda-toolkit/9.1 -export CUDA_INCLUDE_PATH=/software/cuda-9.1/include -export CUDNN_PATH=/software/cuda-9.1/lib64/ -export LIBRARY_PATH=/software/cuda-9.1/lib64/:$LIBRARY_PATH -export LD_LIBRARY_PATH=/software/cuda-9.1/lib64/:$LD_LIBRARY_PATH diff --git a/hpvm/projects/hpvm-tensor-rt/bin/swing_selection.py b/hpvm/projects/hpvm-tensor-rt/bin/swing_selection.py deleted file mode 100644 index b5c484a23029f97218500571ebb8bcafc718d430..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/swing_selection.py +++ /dev/null @@ -1,304 +0,0 @@ - - -import os -import warnings -import matplotlib.pyplot as plt -import matplotlib.cm as cm -from matplotlib.ticker import MultipleLocator -import numpy as np -from scipy.signal import savgol_filter -import math -import struct - - - -def readDataFromText(textFile): - results = [] - with open(textFile, "r") as f: - for line in f: - token = line.split("\t") - if (len(token) < 7): - continue - record = (token[0], float(token[1]), float(token[5]), float(token[6])) - results.append(record) - return results - - -convL1bins = [(0.985901, 1.36474), (0.852871, 1.16982), (0.422283, 0.55701), (0.259752, 0.335259), (0.216577, 0.277843), (0.185812, 0.23733), (0.148996, 0.189171), (0.100007, 0.125816), (0.0003127876261714846, 0.014511194080114365)] -convL2bins = [(0.995298, 1.3643), (0.861066, 1.16279), (0.426857, 0.547827), (0.262645, 0.330186), (0.218984, 0.273731), (0.187878, 0.233872), (0.150619, 0.186512), (0.10106, 0.124477), (0.00035427528200671077, 0.020199092105031013)] - -biasL1bins = [(0.3510325849056244, 0.49078235030174255), (0.30895063281059265, 0.4311973750591278), (0.16023841500282288, 0.22283604741096497), (0.099583700299263, 0.1381179839372635), (0.08340170979499817, 0.11503150314092636), (0.07280077040195465, 0.09948030859231949), (0.05857400223612785, 0.07965542376041412), (0.04044099152088165, 0.054193537682294846), (0.0, 0.0)] -biasL2bins = [(0.4154910147190094, 0.5820578932762146), (0.3656001389026642, 0.5121639370918274), (0.18930286169052124, 0.2637346684932709), (0.11687946319580078, 0.16306844353675842), (0.09796475619077682, 0.13558265566825867), (0.0848352462053299, 0.11619425565004349), (0.06783176958560944, 0.09277229756116867), (0.046059850603342056, 0.062238890677690506), (0.0, 0.0)] - -gemmL1bins= [(0.711203, 0.772211), (0.625894, 0.679601), (0.322665, 0.350383), (0.199646, 0.216727), (0.166556, 0.180781), (0.142945, 0.155132), (0.114662, 0.124399), (0.0771065, 0.0835984), (0.00034660729579627514, 0.008546584285795689)] -gemmL2bins= [(0.715208, 0.768102), (0.629411, 0.675947), (0.324433, 0.348358), (0.200659, 0.21539), (0.167381, 0.179634), (0.143637, 0.154119), (0.115197, 0.123548), (0.0774642, 0.0829647), (0.0003496285935398191, 0.009841435588896275)] - - - -def findBinByOp(op): - if op == 'tensorConv': - return convL1bins, convL2bins - if op == 'tensorAdd': - return biasL1bins, biasL2bins - if op == 'tensorGemm': - return gemmL1bins, gemmL2bins - - return None, None - - -def getSwing(Lx, opLxbin): - if opLxbin == None: - return 0 - for i, (minT, maxT) in enumerate(opLxbin): - if Lx > minT: - return i - - return 9 - - - -def getConfiguration(L_thresholds): - configuration = [] - for l in L_thresholds: - # L0 is op_type - opL1bin, opL2bin = findBinByOp(l[0]) - # NOTE: L2 is L1 error, L3 is L2 error - sL1 = getSwing(l[2], opL1bin) - sL2 = getSwing(l[3], opL2bin) - if sL1 < 7: - sL1 = sL1 + 1 - if sL2 < 7: - sL2 = sL2 + 1 - configuration.append((l[0], l[1], l[2], l[3], sL1, sL2, max(sL1, sL2))) - - return configuration - - -def displayConfig(config): - for c in config: - print(c) - -def displayMultipleConfigurations(configurations): - for f, c in configurations.items(): - print(f) - displayConfig(c) - print() - -def getConfigFromFile(filename): - L_requirements = readDataFromText(filename) - config = getConfiguration(L_requirements) - return config - - -def getConfigurationsFromDir(dirname): - configurations = dict() - for f in os.listdir(dirname): - configurations[f] = getConfigFromFile(os.path.join(dirname, f)) - - return configurations - - -def getLayerWiseTarget(config): - target = [] - for i, op in enumerate(config): - if (op[0] == 'tensorGemm') or (op[0] == 'tensorConv'): - t = op[6] - for j in range(i+1, len(config)): - if config[j][0] == 'tensorGemm' or config[j][0] == 'tensorConv': - break - t = max(t, config[j][6]) - target.append(t) - t = 0 - - return target - - -def dumpLayerWiseTarget(file, targets): - with open(file, "w") as f: - for name, t in targets.items(): - f.write(name) - f.write(" ") - for i in t: - f.write(str(i)) - f.write(" ") - f.write("\n") - - -def getTargetsFromConfigurations(configs): - targets = dict() - for f, c in configs.items(): - targets[f] = [d[6] for d in c] - - return targets - - -def dumpBenchmarkTargets(name, benchmark_dir): - benchmark_targets = dict() - error = ['linear', 'log', 'quad'] - for e in error: - results_dir = os.path.join(benchmark_dir, e) - configs = getConfigurationsFromDir(results_dir) - benchmark_targets[e] = getTargetsFromConfigurations(configs) - - return benchmark_targets - - - -def dumpTargets(filename, targets): - with open(filename, "w") as f: - for e, file_configs in targets.items(): - for name, config in file_configs.items(): - for c in config: - f.write(str(c)) - f.write(" ") - f.write("\n") - - - -def getLayerSwings(layer_desc, configurations): - - layer_swings = [] - for i in range(len(configurations)): - config_vals = configurations[i] - if len(config_vals) == 0: - continue - - layer_index = 0 - index = 0 - swing_vals = [] - - while layer_index < len(layer_desc): - if len(layer_desc[layer_index]) == 1: - promise_swing = config_vals[index] - layer_type = layer_desc[layer_index][0] - layer_type = layer_type.strip() - print ("****layer_type = ", layer_type) - if layer_type != "conv" and layer_type != "dense": - promise_swing = -9 - if layer_type == "depthwise_conv": - promise_swing = 9 - index += 1 - else: - #print ("index = ", index) - # FIXIT: Doesn't look right - print (config_vals[index], config_vals[index+1]) - promise_swing = max(config_vals[index], config_vals[index+1]) - stride = len(layer_desc[layer_index]) - index += stride - - swing_vals.append(promise_swing) - layer_index += 1 - - layer_swings.append(swing_vals) - - return layer_swings - - - - -def loadLayerDesc(layer_desc_file): - - layer_desc = [] - f = open(layer_desc_file) - for x in f: - vals = x.split() - layer_desc.append(vals) - - return layer_desc - - - -def dumpLayerTargets(targets, tuned_result_dir, layer_desc_file): - - layer_desc = loadLayerDesc(layer_desc_file) - print (layer_desc) - - file_names = [] - configurations = [] - for e, file_configs in targets.items(): - for name, config in file_configs.items(): - config_vals = [] - for c in config: - config_vals.append(c) - print (config_vals) - - configurations.append(config_vals) - - rank = e + "_" + "_".join(name.split("_")[-2:]) - file_names.append(rank) - - - # NOTE: get PROMISE swing values corresponding to each layer - layer_swings = getLayerSwings(layer_desc, configurations) - - targets_file_path = tuned_result_dir + "/layer_targets.txt" - f = open(targets_file_path, "w+") - - for config in layer_swings: - index = 0 - for swing in config: - swing_str = "" - if swing == 8 or swing == 9: - layer_size = len(layer_desc[index]) - for i in range(layer_size): - swing_str += str(swing) - if i < layer_size - 1: - swing_str += " " - elif swing == -9: - swing_str += "8" - else: - swing_str += str(swing) - - if index < len(config) - 1: - swing_str += "," - - f.write(swing_str) - index += 1 - - f.write("\n") - - f.close() - - print(layer_swings) - return layer_swings, file_names - - - -def replaceFirstLayer(layer_swings): - - # Ensuring first conv on GPU - for conf in layer_swings: - conf[0] = 9 - - - -def computeLayerTargets(tuned_result_dir, layer_desc_file): - - targets_file_path = tuned_result_dir + "/tensor_targets.txt" - targets = dumpBenchmarkTargets(targets_file_path, tuned_result_dir) - - dumpTargets(targets_file_path, targets) - - layer_swings, file_names = dumpLayerTargets(targets, tuned_result_dir, layer_desc_file) - - replaceFirstLayer(layer_swings) - - return layer_swings, file_names - - -# Externally-called function -def compute_swing_selection(tuned_result_dir, layer_file): - - return computeLayerTargets(tuned_result_dir, layer_file) - - - - -if __name__ == "__main__": - - tuned_result_dir = "./vgg16_cifar10_tuner_1/high_confidence/" - layer_file = "layer_composition.txt" - - tuned_result_dir = "./resnet18_cifar10_tuner_1/high_confidence/" - layer_file = "layer_composition2.txt" - computeLayerTargets(tuned_result_dir, layer_file) diff --git a/hpvm/projects/hpvm-tensor-rt/bin/tensor_inline.sh b/hpvm/projects/hpvm-tensor-rt/bin/tensor_inline.sh deleted file mode 100755 index f67f22ebad5352d99238addd26d9e1b568ee2125..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/tensor_inline.sh +++ /dev/null @@ -1,2 +0,0 @@ -clang-4.0 -emit-llvm tensor_cpu_runtime.cc -S -o tensor_cpu_runtime.ll -opt-4.0 -always-inline tensor_cpu_runtime.ll -S -o tensor_cpu_runtime.ll diff --git a/hpvm/projects/hpvm-tensor-rt/bin/time_jetson_profiles.py b/hpvm/projects/hpvm-tensor-rt/bin/time_jetson_profiles.py deleted file mode 100644 index d0cde1e016fbbe67f9e98e43546bb3df38971f12..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/time_jetson_profiles.py +++ /dev/null @@ -1,256 +0,0 @@ - - - - - - -class Benchmark: - def __init__(self): - self.binary_path = "" - self.binary_time = 0 - self.batch_time = 0 - self.num_layers = 0 - self.data_size = 0 - self.num_classes = 0 - self.batch_size = 50 - - - -ResNet50 = Benchmark() -ResNet50.binary_path = "resnet_imagenet" -ResNet50.binary_time = 3.85 * 100 # 50 images * 100 batches -ResNet50.batch_time = 3.85 # Time for batch with 50 images -ResNet50.num_layers = 53 -ResNet50.data_size = 50 * 3 * 224 * 224 * 4 # *4 for Float32 Data -ResNet50.num_classes = 1000 -ResNet50.batch_size = 50 - - - -ResNet18 = Benchmark() -ResNet18.binary_path = "resnet18_cifar10" -#ResNet50.binary_time = 5.1 * 60 # 5.1 mins * 60 secs/min -#ResNet18.binary_time = 12.9 # 50 images * 100 batches -#ResNet18.batch_time = 12.9 / 50 # Time for batch with 50 images - -# Updated numbers based on batch_size = 50 -- NOTE: Underutilizes GPU - this can be better -ResNet18.binary_time = 78 # 50 images * 100 batches -ResNet18.batch_time = 78 / 100 # Time for batch with 50 images - -ResNet18.num_layers = 21 -ResNet18.data_size = 50 * 3 * 32 * 32 * 4 # *4 for Float32 Data -ResNet18.num_classes = 10 -ResNet18.batch_size = 50 - - - -MobileNet = Benchmark() -MobileNet.binary_path = "mobilenet_cifar10" -MobileNet.binary_time = 103.0 # 50 images * 100 batches -MobileNet.batch_time = 103.0 / 100 # Time for batch with 50 images -MobileNet.num_layers = 15 -MobileNet.data_size = 50 * 3 * 32 * 32 * 4 # *4 for Float32 Data -MobileNet.num_classes = 10 -MobileNet.batch_size = 50 - - - -VGG16_ImageNet = Benchmark() -VGG16_ImageNet.binary_path = "vgg16_imagenet" -#VGG16_ImageNet.binary_time = 10.6 * 60 # 5.1 mins * 60 secs/min -VGG16_ImageNet.binary_time = 4.55 * 100 # 50 images * 100 batches -VGG16_ImageNet.batch_time = 4.55 -VGG16_ImageNet.num_layers = 16 -VGG16_ImageNet.data_size = 50 * 3 * 224 * 224 * 4 -VGG16_ImageNet.num_classes = 1000 -VGG16_ImageNet.batch_size = 50 - - -VGG16_CIFAR10 = Benchmark() -VGG16_CIFAR10.binary_path = "vgg16_cifar10" -#VGG16_CIFAR10.binary_time = 19.0 # 50 images * 100 batches -#VGG16_CIFAR10.batch_time = 19.0 /50 - -# Updated numbers based on batch_size = 50 -- NOTE: Underutilizes GPU - this can be better -VGG16_CIFAR10.binary_time = 55.7 # 50 images * 100 batches -VGG16_CIFAR10.batch_time = 55.7 / 100 - -VGG16_CIFAR10.num_layers = 15 -VGG16_CIFAR10.data_size = 50 * 3 * 32 * 32 * 4 -VGG16_CIFAR10.num_classes = 10 -VGG16_CIFAR10.batch_size = 50 - - -VGG16_CIFAR100 = Benchmark() -VGG16_CIFAR100.binary_path = "vgg16_cifar100" -VGG16_CIFAR100.binary_time = 55.7 # 50 images * 100 batches -VGG16_CIFAR100.batch_time = 55.7 / 100 -VGG16_CIFAR100.num_layers = 15 -VGG16_CIFAR100.data_size = 50 * 3 * 32 * 32 * 4 -VGG16_CIFAR100.num_classes = 100 -VGG16_CIFAR100.batch_size = 50 - - - -AlexNet_ImageNet = Benchmark() -AlexNet_ImageNet.binary_path = "alexnet_imagenet" -AlexNet_ImageNet.binary_time = 0.7 * 100 -AlexNet_ImageNet.batch_time = 0.7 -AlexNet_ImageNet.num_layers = 8 -AlexNet_ImageNet.data_size = 50 * 3 * 224 * 224 * 4 -AlexNet_ImageNet.num_classes = 1000 -AlexNet_ImageNet.batch_size = 50 - - - -AlexNet_CIFAR10 = Benchmark() -AlexNet_CIFAR10.binary_path = "alexnet_cifar10" -AlexNet_CIFAR10.binary_time = 23.52 -AlexNet_CIFAR10.batch_time = 23.52 / 100 -AlexNet_CIFAR10.num_layers = 6 -AlexNet_CIFAR10.data_size = 50 * 3 * 32 * 32 * 4 -AlexNet_CIFAR10.num_classes = 10 -AlexNet_CIFAR10.batch_size = 50 - - -AlexNet2_CIFAR10 = Benchmark() -AlexNet2_CIFAR10.binary_path = "alexnet2_cifar10" -AlexNet2_CIFAR10.binary_time = 27.1 -AlexNet2_CIFAR10.batch_time = 27.1 / 100 -AlexNet2_CIFAR10.num_layers = 7 -AlexNet2_CIFAR10.data_size = 50 * 3 * 32 * 32 * 4 -AlexNet2_CIFAR10.num_classes = 10 -AlexNet2_CIFAR10.batch_size = 50 - - - -LeNet_CIFAR10 = Benchmark() -LeNet_CIFAR10.binary_path = "lenet_keras" -LeNet_CIFAR10.binary_time = 2.5 -LeNet_CIFAR10.batch_time = 2.5 / 50 -LeNet_CIFAR10.num_layers = 4 -LeNet_CIFAR10.data_size = 50 * 3 * 32 * 32 * 4 -LeNet_CIFAR10.num_classes = 10 -LeNet_CIFAR10.batch_size = 50 - - - - - - -# 100 batches with batch size of 50 each -batch_count = 100 -promise_conf_runs = 30 # 30 runs for Statistical Confidence -promise_prof_runs = 10 # 10 runs for error profile collection -promise_knobs = 7 - -total_machines = 100 -total_confs = 50 -download_time_per_1MB = (6.1 * 60) / 100 # 6.1 mins over 4G LTE network for 100 MB data upload -upload_time_per_1MB = (26.4 * 60) / 100 # 26.4 mins over 4G LTE network for 100 MB data upload - - - - -def getErrorProfileTime(Bench): - - #time_per_batch = Bench.binary_time / batch_count - - time_per_batch = Bench.batch_time - - total_knobs = promise_knobs * Bench.num_layers - total_runs = total_knobs * promise_prof_runs - - promise_total_time = total_runs * time_per_batch - - fp16_total_time = Bench.num_layers * time_per_batch - - profile_time = promise_total_time + fp16_total_time - - return profile_time - - - - - -def getConfTime(Bench): - - conf_per_machine = promise_conf_runs * (total_confs * 1.0 / total_machines) - conf_time = conf_per_machine * Bench.binary_time - - return conf_time - - - - -def getNetworkTime(Bench): - - # Calibration Download Time - download_data_MB = Bench.data_size * 1.0 / 1000000 - download_data_time = download_data_MB * download_time_per_1MB - - # Profile Uploading (to Cloud Server) Time - total_knobs = (promise_knobs + 1) * Bench.num_layers - profile_size = total_knobs * Bench.batch_size * Bench.num_classes * 4 # *4 for FP32 data - - print (" ") - print ("--- profile_size = ", profile_size) - profile_size_MB = profile_size * 1.0 / 1000000 - upload_data_time = profile_size_MB * upload_time_per_1MB - - network_time = download_data_time + upload_data_time - - print( "network_time = ", download_data_time, upload_data_time, network_time) - return network_time - - - - -def getTimeOnEdge(Bench): - - err_time = getErrorProfileTime(Bench) - conf_time = getConfTime(Bench) - network_time = getNetworkTime(Bench) - - total_time = err_time + conf_time + network_time - total_time = total_time / 60 - - return total_time - - - - -if __name__ == "__main__": - - - resnet50_time = getTimeOnEdge(ResNet50) - print ("*** ResNet50 time (mins) = ", resnet50_time) - - resnet18_time = getTimeOnEdge(ResNet18) - print ("*** ResNet18 time (mins) = ", resnet18_time) - - - mobilenet_time = getTimeOnEdge(MobileNet) - print ("*** MobileNet time (mins) = ", mobilenet_time) - - - vgg16_img_time = getTimeOnEdge(VGG16_ImageNet) - print ("*** VGG16-Imagenet time (mins) = ", vgg16_img_time) - - vgg16_cifar10_time = getTimeOnEdge(VGG16_CIFAR10) - print ("*** VGG16-CIFAR10 time (mins) = ", vgg16_cifar10_time) - - vgg16_cifar100_time = getTimeOnEdge(VGG16_CIFAR100) - print ("*** VGG16-CIFAR100 time (mins) = ", vgg16_cifar100_time) - - alexnet_img_time = getTimeOnEdge(AlexNet_ImageNet) - print ("*** AlexNet-Imagenet time (mins) = ", alexnet_img_time) - - alexnet_cifar10_time = getTimeOnEdge(AlexNet_CIFAR10) - print ("*** AlexNet-CIFAR10 time (mins) = ", alexnet_cifar10_time) - - alexnet2_cifar10_time = getTimeOnEdge(AlexNet2_CIFAR10) - print ("*** AlexNet2-CIFAR10 time (mins) = ", alexnet2_cifar10_time) - - lenet_cifar10_time = getTimeOnEdge(LeNet_CIFAR10) - print ("*** LeNet-CIFAR10 time (mins) = ", lenet_cifar10_time) diff --git a/hpvm/projects/hpvm-tensor-rt/bin/times.py b/hpvm/projects/hpvm-tensor-rt/bin/times.py deleted file mode 100644 index 082b0d91acb19e70a6c217b25f8747f3197b45b7..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/times.py +++ /dev/null @@ -1,78 +0,0 @@ - - - -class Config: - def __init__(self): - self.runtime = 0 - self.fed_runs = 0 - self.full_runs = 0 - - -def computeTimes(bench): - - conf_runs = 60 - fed_time = (bench.runtime * 100) + (bench.fed_runs * conf_runs * bench.runtime) - fed_time_hrs = fed_time / (60*60) - - full_time = (bench.runtime * 1000) + (bench.full_runs * conf_runs * bench.runtime) - full_time_hrs = full_time / (60*60) - - print ("fedtime_hrs = ", fed_time_hrs, " full_time_hrs = ", full_time_hrs, "\n") - - - -if __name__ == "__main__": - - - resnet = Config() - resnet.runtime = 8 - resnet.fed_runs = 3 - resnet.full_runs = 5 - - computeTimes(resnet) - - alexnet = Config() - alexnet.runtime = 7.8 - alexnet.fed_runs = 47 - alexnet.full_runs = 274 - - computeTimes(alexnet) - - alexnet2 = Config() - alexnet2.runtime = 2.3 - alexnet2.fed_runs = 62 - alexnet2.full_runs = 339 - - computeTimes(alexnet2) - - vgg1 = Config() - vgg1.runtime = 7.4 - vgg1.fed_runs = 15 - vgg1.full_runs = 211 - - computeTimes(vgg1) - - - vgg2 = Config() - vgg2.runtime = 15.4 - vgg2.fed_runs = 8 - vgg2.full_runs = 150 - - computeTimes(vgg2) - - - lenet = Config() - lenet.runtime = 0.98 - lenet.fed_runs = 64 - lenet.full_runs = 228 - - computeTimes(lenet) - - - mobilenet = Config() - mobilenet.runtime = 11 - mobilenet.fed_runs = 32 - mobilenet.full_runs = 267 - - computeTimes(mobilenet) - diff --git a/hpvm/projects/hpvm-tensor-rt/bin/tuner_postprocess.py b/hpvm/projects/hpvm-tensor-rt/bin/tuner_postprocess.py deleted file mode 100644 index 6fc680973783f700ed0297279a4ab5802c15e8ab..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/tuner_postprocess.py +++ /dev/null @@ -1,523 +0,0 @@ -from sys import stderr - -output_perf = list(range(21, 30 + 1)) -input_sampling = list(range(31, 36 + 1)) -red_sampling = list(range(41, 46 + 1)) -groups = { - "fp32": [11], - "fp16": [12], - "perf": output_perf, - "samp": input_sampling, - "red_samp": red_sampling -} -# 11 -> 1, 12 -> 1 -param_remap = { - 11: 1, 12: 1 -} -fp32_fp16_remap = { - 41: 42, 43: 44, 45: 46, 11: 12 -} -inv_group = {v: k for k, vs in groups.items() for v in vs} -speedups = { - 11: 1.0, - 12: 1.5, - 21: 2.25, - 22: 2.25, - 23: 1.88, - 24: 1.88, - 25: 1.88, - 26: 2.25, - 27: 2.25, - 28: 1.88, - 29: 1.88, - 30: 1.88, - 31: 2.25, - 32: 2.25, - 33: 1.8, - 34: 1.8, - 35: 1.8, - 36: 1.8, - 41: 1.5, - 42: 2.25, - 43: 1.4, - 44: 2, - 45: 1.25, - 46: 1.8 -} - - -def read_list_of_ops(template_filepath): - from re import match - from itertools import dropwhile, takewhile - with template_filepath.open() as f: - all_lines = [line.strip() for line in f.readlines()] - head_chopped = list(dropwhile( - (lambda line: match(r"\++", line) is None), all_lines))[1:] - tail_chopped = list(takewhile( - (lambda line: match(r"-+", line) is None), head_chopped)) - if not tail_chopped: - raise RuntimeError( - "Format error in file {}".format(template_filepath.as_posix()) - ) - op_lines = tail_chopped[1:] - ops = [line.split()[2] for line in op_lines] - return ops - - -def read_op_costs(filepath): - with filepath.open() as f: - return [float(line.strip()) for line in f.readlines()] - - -class Config(object): - def __init__(self, avg_acc, cost, speedup, values): - self.avg_acc, self.cost, self.speedup = avg_acc, cost, speedup - self.values = values - - @classmethod - def from_file(cls, filepath, ops): - from re import match - - with filepath.open() as f: - file_lines = f.readlines() - if not file_lines: - raise RuntimeError( - "Format error in file {}".format(filepath.as_posix())) - summary_line, config_lines = file_lines[0], file_lines[1:] - values = [int(l.strip()) for l in config_lines] - if len(values) != len(ops): - raise RuntimeError( - "Format error in file {}".format(filepath.as_posix())) - - # Summary line format: - # avg_accuracy=34.5229 config_cost=818.838299524 speedup=2.08307548754 - matched = match( - r"\s*avg_accuracy=([\d.]+)\s*config_cost=([\d.]+)\s*speedup=([\d.]+)\s*", summary_line - ) - avg_acc, cost, speedup = [float(matched.group(i)) for i in range(1, 4)] - - return cls(avg_acc, cost, speedup, values) - - @classmethod - def from_tuner_conf(cls, tuner_conf): - speedup = tuner_conf.speedup - cost = 0 - avg_acc = tuner_conf.acc - lines = [] - for _, approx_name, v in tuner_conf.lines: - replacements = groups.get(approx_name, []) - if len(replacements) == 1: - lines.append(replacements[0]) - else: - lines.append(v) - return cls(avg_acc, cost, speedup, lines) - - @staticmethod - def calculate_cost(flags, baseline_costs): - total_cost = 0 - for flag, cost in zip(flags, baseline_costs): - speedup = speedups.get(flag) - if speedup is None: - raise RuntimeError(f"Speedup of flag {flag} not given") - total_cost += cost / speedup - return total_cost - - def remap_to_fp16(self, baseline_costs): - remapped = [fp32_fp16_remap.get(v, v) for v in self.values] - if len(baseline_costs) != len(remapped): - raise RuntimeError( - "Provided baseline_costs does not map one-on-one to ops") - old_cost_match = self.calculate_cost(self.values, baseline_costs) - if abs(self.cost - old_cost_match) > 1e-2: - raise RuntimeError( - "Cost computation mismatch. Probably reading wrong costs " - "or speedup params have changed" - ) - new_cost = self.calculate_cost(remapped, baseline_costs) - speedup = (self.cost * self.speedup) / new_cost - return Config(self.avg_acc, new_cost, speedup, remapped) - - def __repr__(self): - head = f"avg_accuracy={self.avg_acc}\tconfig_cost={self.cost}\tspeedup={self.speedup}" - body = "\n".join((str(v) for v in self.values)) - return f"{head}\n{body}" - - -class TunerConf(object): - psnr_upper_bound = 200 - - def __init__(self, speedup, energy, acc, acc_loss, lines, seq_id=0): - self.speedup = speedup - self.energy = energy - self.acc = acc - self.acc_loss = acc_loss - for l in lines: - if len(l) != 3: - raise RuntimeError(f"Line {l} is malformed") - self.lines = lines - self.seq_id = seq_id - - @staticmethod - def get_baseline_conf(ops): - baseline = groups["fp32"][0] - baseline_config = Config( - avg_acc=TunerConf.psnr_upper_bound, - cost=0, # won't be used by TunerConf - speedup=1.0, - values=[baseline for _ in range(len(ops))] - ) - return TunerConf.from_config(ops, baseline_config, 0) - - @classmethod - def from_config(cls, ops, config, seq_id): - if len(ops) != len(config.values): - raise RuntimeError( - f"Number of ops mismatch in {ops} and {config.values}" - ) - lines = [] - for o, v in zip(ops, config.values): - approx_name = inv_group.get(v) - if approx_name is None: - raise RuntimeError(f"Promise flag {v} is not understood") - lines.append((o, approx_name, v)) - return cls( - speedup=config.speedup, energy=1.0, - acc=config.avg_acc, acc_loss=cls.psnr_upper_bound - config.avg_acc, - lines=lines, seq_id=seq_id - ) - - @classmethod - def many_from_file(cls, filepath): - def maybe_int(value, default=None): - try: - return int(value) - except ValueError: - return None - - import re - - with filepath.open() as f: - file_lines = f.read() - tuner_confs = [] - for match in re.finditer(r"\++\n([^-]*)\n\-+", file_lines, re.MULTILINE): - meta, *config_lines = match.group(1).split('\n') - _, *stats = meta.split(' ') - speedup, energy, acc, acc_loss = [float(s) for s in stats] - configs = [] - for line in config_lines: - _, _, op, approx, param = line.split(' ') - param = maybe_int(param, 1) - configs.append((op, approx, param)) - tuner_confs.append(cls(speedup, energy, acc, acc_loss, configs)) - return tuner_confs - - def __repr__(self): - def repr_line(idx, line): - op, approx, param = line - param = param_remap.get(param, param) - return f"{idx + 1} gpu {op} {approx} {param}\n" - - head = ( - f"+++++\nconf{self.seq_id} {self.speedup:.4f} {self.energy:.4f} " - f"{self.acc:.4f} {self.acc_loss:.4f}\n" - ) - tail = "-----" - printed_lines = "".join(( - repr_line(i, line) for i, line in enumerate(self.lines) - )) - return head + printed_lines + tail - - -def parse_config(filepath, ops, op_counter, config_summaries): - config = Config.from_file(filepath, ops) - config_summaries.append((config.speedup, config.avg_acc)) - for v, name in zip(config.values, ops): - v_group = inv_group.get(v) - op_counter[name][v_group] += 1 - - -def plot_pareto_stats(pareto, others, save_to): - import matplotlib.pyplot as plt - - if not pareto and not others: - return - p_xs, p_ys = zip(*pareto) if pareto else ([], []) - o_xs, o_ys = zip(*others) if others else ([], []) - scale = 10 - alpha = 1 - - fig = plt.figure() - ax = fig.add_subplot(111) - ax.scatter(p_xs, p_ys, c="green", label="pareto", s=scale, alpha=alpha) - ax.scatter(o_xs, o_ys, c="red", label="non-pareto", s=scale, alpha=alpha) - ax.set_xlabel("speedup") - ax.set_ylabel("avg_psnr") - ax.legend() - fig.savefig(save_to, dpi=200) - - -def scan_config_dirs(configs_base_dir, ops): - from collections import Counter - - all_configs_dir = configs_base_dir/"high_confidence" - pareto_dir = configs_base_dir/"pareto" - if not pareto_dir.is_dir(): - print( - "No pareto directory found at {}; skipping".format( - pareto_dir.as_posix()), - file=stderr - ) - pareto_confs = set() - else: - pareto_confs = set((p.name for p in pareto_dir.iterdir())) - - counters = {name: Counter() for name in set(ops)} - pareto_summaries, other_summaries = [], [] - for filepath in all_configs_dir.iterdir(): - filename = filepath.name - if filename in pareto_confs: - filepath = pareto_dir / filename - parse_config(filepath, ops, counters, pareto_summaries) - else: - parse_config(filepath, ops, counters, other_summaries) - - return pareto_summaries, other_summaries, counters - - -def translate_configs(configs_base_dir, ops): - from pathlib import Path - - pareto_dir = configs_base_dir/"pareto" - output_file = configs_base_dir/"tuner_confs.txt" - baseline = str(TunerConf.get_baseline_conf(ops)) - tuner_conf_strs = [baseline] - for i, config_path in enumerate(pareto_dir.iterdir()): - config = Config.from_file(config_path, ops) - tuner_conf = TunerConf.from_config(ops, config, i + 1) - tuner_conf_strs.append(str(tuner_conf)) - with output_file.open('w') as f: - print("\n".join(tuner_conf_strs), file=f) - - -def print_stats(args): - from pprint import pprint - - ops = read_list_of_ops(args.bench_info/"tuner_conf_template.txt") - pareto, others, counters = scan_config_dirs(args.configs, ops) - if pareto: - plot_pareto_stats(pareto, others, args.configs/"pareto.png") - translate_configs(args.configs, ops) - pprint(counters) - - -def run_binary(bin_path): - import subprocess - import os - - fnull = open(os.devnull, 'wb') - p = subprocess.Popen(["./" + bin_path], stdout=fnull) - p.wait() - if p.returncode != 0: - # Something went wrong - print( - "Child program returned non-zero; you may want to stop and check.", - file=stderr - ) - - -def getPSNR(file_name): - with open(file_name) as f: - try: - raw_str = f.read() - violation, avg_psnr = [float(s) for s in raw_str.split(",")] - except: - return None, None - return 100 - violation, avg_psnr - - -def run_validation(args): - from pathlib import Path - from shutil import copy - from tqdm import tqdm - ops = read_list_of_ops(args.bench_info/"tuner_conf_template.txt") - binary = Path(args.binary).resolve() - dump_path = args.dump_violation - if dump_path is not None and not dump_path.is_dir(): - dump_path.mkdir() - configs = [p for p in args.configs.iterdir() if p.is_file()] - for config_path in tqdm(configs): - config = Config.from_file(config_path, ops) - promise_flags = binary.parent / "promise_flags" - with promise_flags.open('w') as f: - f.writelines((f"{v}\n" for v in config.values)) - run_binary(args.binary) - success_rate, avg_psnr = getPSNR("final_accuracy") - tqdm.write( - f"config: {config_path.as_posix()}, " - f"success_rate = {success_rate}, " - f"avg_psnr = {config.avg_acc} -> {avg_psnr}" - ) - if success_rate < args.threshold: - tqdm.write( - ( - "WARNING: config {} violates threshold on vaildation set; " - "success_rate = {}, avg_psnr = {}" - ).format(config_path, success_rate, avg_psnr), - file=stderr - ) - if dump_path is not None: - conf_name = config_path.name - copy(config_path.as_posix(), dump_path / conf_name) - - -def remap_configs(args): - ops = read_list_of_ops(args.bench_info/"tuner_conf_template.txt") - costs = read_op_costs(args.bench_info/"op_cost.txt") - output_folder = args.configs.resolve().parent / "remapped" - if not output_folder.is_dir(): - output_folder.mkdir() - for config_path in args.configs.iterdir(): - config = Config.from_file(config_path, ops) - old_speedup = config.speedup - config = config.remap_to_fp16(costs) - print(f"speedup: {old_speedup} -> {config.speedup}") - output_path = output_folder / config_path.name - with output_path.open('w') as f: - f.write(str(config)) - print( - "Finished.\n" - "Average psnr in files are not calibrated as it's impossible " - "without rerunning. Make sure to rerun the remapped configs.", - file=stderr - ) - - -def plot_compare_pareto(args): - import matplotlib.pyplot as plt - import numpy as np - - org = TunerConf.many_from_file(args.original) - cali = TunerConf.many_from_file(args.calibrated) - org, cali = org[1:], cali[1:] # remove baseline - if not org and not cali: - return - o_xs, o_ys = [tc.speedup for tc in org], [tc.acc for tc in org] - c_xs, c_ys = [tc.speedup for tc in cali], [tc.acc for tc in cali] - - scale = 10 - fig = plt.figure() - - ax1 = fig.add_subplot(211) - ax1.scatter(o_xs, o_ys, c="red", label="predicted", s=scale) - ax1.scatter(c_xs, c_ys, c="green", label="calibrated", s=scale) - ax1.set_xlabel("speedup") - ax1.set_ylabel("avg_psnr") - ax1.legend() - - ax2 = fig.add_subplot(212) - ax2.scatter(c_ys, np.array(c_xs) - np.array(o_xs), s=scale) - ax2.set_xlabel("avg_psnr") - ax2.set_ylabel("diff_speedup") - - fig.savefig(args.output.as_posix(), dpi=200) - - -def inv_translate(args): - tuner_confs = TunerConf.many_from_file(args.file)[1:] - configs = [Config.from_tuner_conf(tc) for tc in tuner_confs] - args.output_path.mkdir(exist_ok=True) - output = args.output_path/"high_confidence" - output.mkdir(exist_ok=True) - for i, conf in enumerate(configs): - with (output/f"{args.file.stem}_{i}").open('w') as f: - f.write(str(conf)) - - -def parse_args(): - import argparse - from pathlib import Path - - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers( - description="Valid subcommands", required=True, dest="subcommand" - ) - - stats_p = subparsers.add_parser( - "stats", help="Print out stats of a set of configs") - stats_p.add_argument( - "bench_info", type=Path, - help="Benchmark settings folder containing tuner_conf_template.txt" - ) - stats_p.add_argument( - "configs", type=Path, - help="Configs folder. Should contain high_confidence (and optionally pareto) subfolders" - ) - stats_p.set_defaults(func=print_stats) - - cali_p = subparsers.add_parser( - "print_cali", help="Plot calibrated + original pareto curves") - cali_p.add_argument( - "original", type=Path, help="Original pareto curve" - ) - cali_p.add_argument( - "calibrated", type=Path, help="Calibrated pareto curve" - ) - cali_p.add_argument( - "-o", "--output", default="comparison.png", - type=Path, help="Path to output image" - ) - cali_p.set_defaults(func=plot_compare_pareto) - - ref_p = subparsers.add_parser( - "validation", help="Run validation on validation set(s)" - ) - ref_p.add_argument( - "bench_info", type=Path, - help="Benchmark settings folder containing tuner_conf_template.txt" - ) - ref_p.add_argument("binary", type=str, help="Path to binary") - ref_p.add_argument( - "configs", type=Path, help="Path to folder of configs to run" - ) - ref_p.add_argument( - "-t", "--threshold", type=float, default=95.0, - help="Threshold of violation rate below which the test fails" - ) - ref_p.add_argument( - "-o", "--dump_violation", type=Path, help="Place to dump violating configs" - ) - ref_p.set_defaults(func=run_validation) - - remap_p = subparsers.add_parser( - "remap", help="Remap fp32 to fp16" - ) - remap_p.add_argument( - "bench_info", type=Path, - help="Benchmark settings folder containing tuner_conf_template.txt" - ) - remap_p.add_argument( - "configs", type=Path, help="Path to folder of configs to remap" - ) - remap_p.set_defaults(func=remap_configs) - - trans_p = subparsers.add_parser( - "translate", help="Translate tuner conf back to autotuner format" - ) - trans_p.add_argument( - "file", type=Path, help="Input file (one)" - ) - trans_p.add_argument( - "output_path", type=Path, help="Output folder" - ) - trans_p.set_defaults(func=inv_translate) - - return parser.parse_args() - - -def main(): - args = parse_args() - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/hpvm/projects/hpvm-tensor-rt/bin/tuner_src b/hpvm/projects/hpvm-tensor-rt/bin/tuner_src deleted file mode 120000 index f24dde48b6f885fd3783f453f514546e6e4a4ed1..0000000000000000000000000000000000000000 --- a/hpvm/projects/hpvm-tensor-rt/bin/tuner_src +++ /dev/null @@ -1 +0,0 @@ -../autotuner/tuner_driver_src/ \ No newline at end of file