diff --git a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py index 55daa05dc698361f40390b0cf1e20bef7593057d..eeca0ed8ed8ed407b9c84592b22820857678b311 100644 --- a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py +++ b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py @@ -7,8 +7,8 @@ # Batch 12: Error Sens: 10, 25, 35, for Loss1, 2, 3, respectively, Min: P3. 1000 Runs for All # Batch 13: No Error Sens: Equal Runs (1000) for all. Min: P1 # Batch 14: Reruning Batch12 with bugFix! -# Batch 15: MAJOR CHANGE: 3 different skip levels for each Loss1,Loss2,Loss3 - +# Batch 16: MAJOR CHANGE: 3 different skip levels for each Loss1,Loss2,Loss3 +# Batch 17: Baseline with 3000 runs. Compare with Batch16 class Benchmark: @@ -46,9 +46,9 @@ Alexnet1.skip_layers = 0 Alexnet1.skip_layer_str = "5_0" Alexnet1.base_dir = "../build_tuner/tuner_results/alexnet_cifar10/" -Alexnet1.result_dir_1 = "../build_tuner/tuner_results/alexnet_cifar10/loss_1/batch15" -Alexnet1.result_dir_2 = "../build_tuner/tuner_results/alexnet_cifar10/loss_2/batch15" -Alexnet1.result_dir_3 = "../build_tuner/tuner_results/alexnet_cifar10/loss_3/batch15" +Alexnet1.result_dir_1 = "../build_tuner/tuner_results/alexnet_cifar10/loss_1/batch17" +Alexnet1.result_dir_2 = "../build_tuner/tuner_results/alexnet_cifar10/loss_2/batch17" +Alexnet1.result_dir_3 = "../build_tuner/tuner_results/alexnet_cifar10/loss_3/batch17" Alexnet1.tensor_desc_file = "tuner_results/alexnet_cifar10/alexnet_tensors.txt" Alexnet1.layer_file = "tuner_results/alexnet_cifar10/alexnet_layers.txt" @@ -79,9 +79,9 @@ Alexnet2.start_promise_range = 1 Alexnet2.skip_layer_str = "6_1_0" Alexnet2.base_dir = "../build_tuner/tuner_results/alexnet2_cifar10/" -Alexnet2.result_dir_1 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_1/batch15" -Alexnet2.result_dir_2 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_2/batch15" -Alexnet2.result_dir_3 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_3/batch15" +Alexnet2.result_dir_1 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_1/batch17" +Alexnet2.result_dir_2 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_2/batch17" +Alexnet2.result_dir_3 = "../build_tuner/tuner_results/alexnet2_cifar10/loss_3/batch17" Alexnet2.tensor_desc_file = "tuner_results/alexnet2_cifar10/alexnet2_tensors.txt" Alexnet2.layer_file = "tuner_results/alexnet2_cifar10/alexnet2_layers.txt" Alexnet2.cost_file = "../build_tuner/tuner_results/alexnet2_cifar10/op_cost.txt" @@ -109,9 +109,9 @@ Alexnet3.start_promise_range = 1 Alexnet3.skip_layer_str = "14_3_4_1_6" Alexnet3.base_dir = "../build_tuner/tuner_results/vgg16_cifar10/" -Alexnet3.result_dir_1 = "../build_tuner/tuner_results/vgg16_cifar10/loss_1/batch15" -Alexnet3.result_dir_2 = "../build_tuner/tuner_results/vgg16_cifar10/loss_2/batch15" -Alexnet3.result_dir_3 = "../build_tuner/tuner_results/vgg16_cifar10/loss_3/batch15" +Alexnet3.result_dir_1 = "../build_tuner/tuner_results/vgg16_cifar10/loss_1/batch17" +Alexnet3.result_dir_2 = "../build_tuner/tuner_results/vgg16_cifar10/loss_2/batch17" +Alexnet3.result_dir_3 = "../build_tuner/tuner_results/vgg16_cifar10/loss_3/batch17" Alexnet3.tensor_desc_file = "tuner_results/vgg16_cifar10/vgg16_tensors.txt" Alexnet3.layer_file = "tuner_results/vgg16_cifar10/vgg16_layers.txt" @@ -141,11 +141,11 @@ Alexnet4.start_promise_range = 1 #Alexnet4.skip_layer_str = "0" Alexnet4.skip_layer_str = "0_1_2_14_15_17_18_21" Alexnet4.base_dir = "../build_tuner/tuner_results/resnet18_cifar10/" -Alexnet4.result_dir_1 = "../build_tuner/tuner_results/resnet18_cifar10/loss_1/batch15" -Alexnet4.result_dir_2 = "../build_tuner/tuner_results/resnet18_cifar10/loss_2/batch15" -Alexnet4.result_dir_3 = "../build_tuner/tuner_results/resnet18_cifar10/loss_3/batch15" +Alexnet4.result_dir_1 = "../build_tuner/tuner_results/resnet18_cifar10/loss_1/batch17" +Alexnet4.result_dir_2 = "../build_tuner/tuner_results/resnet18_cifar10/loss_2/batch17" +Alexnet4.result_dir_3 = "../build_tuner/tuner_results/resnet18_cifar10/loss_3/batch17" Alexnet4.tensor_desc_file = "tuner_results/resnet18_cifar10/resnet_tensors.txt" -Alexnet4.layer_file = "tuner_results/resnet18_cifar10/resnet_layers.txt" +Alexnet4.layer_file = "tuner_results/resnet18_cifar10/resnet18_layers.txt" Alexnet4.cost_file = "../build_tuner/tuner_results/resnet18_cifar10/op_cost.txt" Alexnet4.loss1_result_file = "tuner_results/resnet18_cifar10/loss_1/promise_tuned_confs/promise_confs.txt" @@ -174,9 +174,9 @@ Alexnet5.start_promise_range = 1 #Alexnet5.skip_layer_str = "0" Alexnet5.skip_layer_str = "0_1_2_3_4" Alexnet5.base_dir = "../build_tuner/tuner_results/vgg16_cifar100/" -Alexnet5.result_dir_1 = "../build_tuner/tuner_results/vgg16_cifar100/loss_1/batch15" -Alexnet5.result_dir_2 = "../build_tuner/tuner_results/vgg16_cifar100/loss_2/batch15" -Alexnet5.result_dir_3 = "../build_tuner/tuner_results/vgg16_cifar100/loss_3/batch15" +Alexnet5.result_dir_1 = "../build_tuner/tuner_results/vgg16_cifar100/loss_1/batch17" +Alexnet5.result_dir_2 = "../build_tuner/tuner_results/vgg16_cifar100/loss_2/batch17" +Alexnet5.result_dir_3 = "../build_tuner/tuner_results/vgg16_cifar100/loss_3/batch17" Alexnet5.tensor_desc_file = "../build_tuner/tuner_results/vgg16_cifar100/vgg16_tensors.txt" Alexnet5.layer_file = "../build_tuner/tuner_results/vgg16_cifar100/vgg16_layers.txt" @@ -206,9 +206,9 @@ Alexnet6.start_promise_range = 1 Alexnet6.skip_layer_str = "0" Alexnet6.base_dir = "../build_tuner/tuner_results/lenet_keras/" -Alexnet6.result_dir_1 = "../build_tuner/tuner_results/lenet_keras/loss_1/batch15" -Alexnet6.result_dir_2 = "../build_tuner/tuner_results/lenet_keras/loss_2/batch15" -Alexnet6.result_dir_3 = "../build_tuner/tuner_results/lenet_keras/loss_3/batch15" +Alexnet6.result_dir_1 = "../build_tuner/tuner_results/lenet_keras/loss_1/batch17" +Alexnet6.result_dir_2 = "../build_tuner/tuner_results/lenet_keras/loss_2/batch17" +Alexnet6.result_dir_3 = "../build_tuner/tuner_results/lenet_keras/loss_3/batch17" Alexnet6.tensor_desc_file = "tuner_results/lenet_keras/lenet_tensors.txt" Alexnet6.layer_file = "tuner_results/lenet_keras/lenet_layers.txt" @@ -239,9 +239,9 @@ Alexnet7.start_promise_range = 1 #Alexnet7.skip_layer_str = "0" Alexnet7.skip_layer_str = "1_14_0_6_2" Alexnet7.base_dir = "../build_tuner/tuner_results/mobilenet/" -Alexnet7.result_dir_1 = "../build_tuner/tuner_results/mobilenet/loss_1/batch15" -Alexnet7.result_dir_2 = "../build_tuner/tuner_results/mobilenet/loss_2/batch15" -Alexnet7.result_dir_3 = "../build_tuner/tuner_results/mobilenet/loss_3/batch15" +Alexnet7.result_dir_1 = "../build_tuner/tuner_results/mobilenet/loss_1/batch17" +Alexnet7.result_dir_2 = "../build_tuner/tuner_results/mobilenet/loss_2/batch17" +Alexnet7.result_dir_3 = "../build_tuner/tuner_results/mobilenet/loss_3/batch17" Alexnet7.tensor_desc_file = "tuner_results/mobilenet/mobilenet_ops.txt" Alexnet7.layer_file = "tuner_results/mobilenet/mobilenet_layer_comp.txt" @@ -271,9 +271,9 @@ Alexnet8.start_promise_range = 1 #Alexnet8.skip_layer_str = "0" Alexnet8.skip_layer_str = "7_0_1" Alexnet8.base_dir = "../build_tuner/tuner_results/mobilenet_shallow/" -Alexnet8.result_dir_1 = "../build_tuner/tuner_results/mobilenet_shallow/loss_1/batch15" -Alexnet8.result_dir_2 = "../build_tuner/tuner_results/mobilenet_shallow/loss_2/batch15" -Alexnet8.result_dir_3 = "../build_tuner/tuner_results/mobilenet_shallow/loss_3/batch15" +Alexnet8.result_dir_1 = "../build_tuner/tuner_results/mobilenet_shallow/loss_1/batch17" +Alexnet8.result_dir_2 = "../build_tuner/tuner_results/mobilenet_shallow/loss_2/batch17" +Alexnet8.result_dir_3 = "../build_tuner/tuner_results/mobilenet_shallow/loss_3/batch17" Alexnet8.tensor_desc_file = "../build_tuner/tuner_results/mobilenet_shallow/mobilenet_shallow_ops.txt" Alexnet8.layer_file = "../build_tuner/tuner_results/mobilenet_shallow/mobilenet_shallow_layer_comp.txt" diff --git a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py index 0b59ff187d6387ac0616a3e8854029fbafec9b98..2d4a3bb9ca0189e7889abeca2888f985d1bbe380 100644 --- a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py +++ b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py @@ -52,42 +52,44 @@ def computeLayerSwings(): - +gpu = 1 def runPromiseTuner(): - start = startProfile("LeNet") - runPromiseBench(bench_tuner_data["lenet_keras"]) - stopProfile("LeNet", start) + if gpu == 2: + start = startProfile("LeNet") + runPromiseBench(bench_tuner_data["lenet_keras"]) + stopProfile("LeNet", start) - start = startProfile("Alexnet") - runPromiseBench(bench_tuner_data["alexnet_cifar10"]) - stopProfile("Alexnet", start) - - start = startProfile("Alexnet2") - runPromiseBench(bench_tuner_data["alexnet2_cifar10"]) - stopProfile("Alexnet2", start) - - start = startProfile("VGG16_10") - runPromiseBench(bench_tuner_data["vgg16_cifar10"]) - stopProfile("VGG16_10", start) + start = startProfile("Alexnet") + runPromiseBench(bench_tuner_data["alexnet_cifar10"]) + stopProfile("Alexnet", start) - start = startProfile("VGG16_100") - runPromiseBench(bench_tuner_data["vgg16_cifar100"]) - stopProfile("VGG16_100", start) + start = startProfile("Alexnet2") + runPromiseBench(bench_tuner_data["alexnet2_cifar10"]) + stopProfile("Alexnet2", start) - start = startProfile("ResNet") - runPromiseBench(bench_tuner_data["resnet18_cifar10"]) - stopProfile("ResNet", start) + start = startProfile("ResNet") + runPromiseBench(bench_tuner_data["resnet18_cifar10"]) + stopProfile("ResNet", start) - start = startProfile("MobileNet") - runPromiseBench(bench_tuner_data["mobilenet_cifar10"]) - stopProfile("MobileNet", start) + if gpu == 1: + + start = startProfile("VGG16_10") + runPromiseBench(bench_tuner_data["vgg16_cifar10"]) + stopProfile("VGG16_10", start) + + start = startProfile("VGG16_100") + runPromiseBench(bench_tuner_data["vgg16_cifar100"]) + stopProfile("VGG16_100", start) - start = startProfile("MobileNet-SH") - runPromiseBench(bench_tuner_data["mobilenet_shallow"]) - stopProfile("MobileNet-SH", start) + start = startProfile("MobileNet") + runPromiseBench(bench_tuner_data["mobilenet_cifar10"]) + stopProfile("MobileNet", start) + start = startProfile("MobileNet-SH") + runPromiseBench(bench_tuner_data["mobilenet_shallow"]) + stopProfile("MobileNet-SH", start) #runPSNRPromiseBench("pipeline_GEOM") #runPSNRPromiseBench("pipeline_GEMO") @@ -95,7 +97,7 @@ def runPromiseTuner(): #runPSNRPromiseBench("pipeline_GSM") #runPSNRPromiseBench("pipeline_GSME") - dumpProfiles("time_profile_11.txt") + dumpProfiles("time_profile_17.txt") def runPromiseValidation(): @@ -134,6 +136,7 @@ def runAutotuner(): def runSensAnalysis(): + """ start = startProfile("LeNet") test_sensitivity3(bench_tuner_data["lenet_keras"]) stopProfile("LeNet", start) @@ -145,7 +148,8 @@ def runSensAnalysis(): start = startProfile("AlexNet2") test_sensitivity3(bench_tuner_data["alexnet2_cifar10"]) stopProfile("AlexNet2", start) - + """ + start = startProfile("ResNet") test_sensitivity3(bench_tuner_data["resnet18_cifar10"]) stopProfile("ResNet", start) @@ -159,6 +163,7 @@ def runSensAnalysis(): test_sensitivity3(bench_tuner_data["mobilenet_shallow"]) stopProfile("MobileNet_SH", start) + """ start = startProfile("VGG_10") test_sensitivity3(bench_tuner_data["vgg16_cifar10"]) stopProfile("VGG16_10", start) @@ -166,7 +171,9 @@ def runSensAnalysis(): start = startProfile("VGG_100") test_sensitivity3(bench_tuner_data["vgg16_cifar100"]) stopProfile("VGG16_100", start) - + + """ + dumpProfiles("sens_time_prof.txt") @@ -206,9 +213,9 @@ if __name__ == "__main__": #computeLayerSwings() - #runPromiseTuner() + runPromiseTuner() - runPromiseValidation() + #runPromiseValidation() #runSensAnalysis() diff --git a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_hs_tuner.py b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_hs_tuner.py index ae06613aaf1202d7a901e3365f25906e084bc959..f1a9c8f417bafdf4084a687670074101bec3faa0 100644 --- a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_hs_tuner.py +++ b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_hs_tuner.py @@ -7,7 +7,7 @@ from error_sensitivity import select_skip_layers def runPromiseTunerCmd(Bench, dir_prefix, result_dir, acc_threshold, autotuner_runs, skip_layers): - tuner_cmd = "python ../opentuner/autotuner/promise_tuner3.py " + tuner_cmd = "python2 ../opentuner/autotuner/promise_tuner3.py " tuner_cmd += " --test-limit " tuner_cmd += str(autotuner_runs) tuner_cmd += " --binary ./" @@ -48,7 +48,7 @@ def runPromiseTunerCmd(Bench, dir_prefix, result_dir, acc_threshold, autotuner_r def promiseTunerLoss1(Bench, dir_prefix): - tuner_runs = int(Bench.autotuner_runs / 3) + tuner_runs = Bench.autotuner_runs skip_layers1 = "0" skip_layers2 = "0_" + select_skip_layers(Bench, 30) @@ -61,7 +61,7 @@ def promiseTunerLoss1(Bench, dir_prefix): def promiseTunerLoss2(Bench, dir_prefix): - tuner_runs = int(Bench.autotuner_runs / 3) + tuner_runs = Bench.autotuner_runs skip_layers1 = "0" skip_layers2 = "0_" + select_skip_layers(Bench, 20) @@ -75,7 +75,7 @@ def promiseTunerLoss2(Bench, dir_prefix): def promiseTunerLoss3(Bench, dir_prefix): - tuner_runs = int(Bench.autotuner_runs / 3) + tuner_runs = Bench.autotuner_runs skip_layers1 = "0" skip_layers2 = "0_" + select_skip_layers(Bench, 10) @@ -86,6 +86,7 @@ def promiseTunerLoss3(Bench, dir_prefix): runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.5, tuner_runs, skip_layers3) +BASELINE = True def runPromiseBench(Bench): @@ -93,12 +94,21 @@ def runPromiseBench(Bench): # NOTE-IMP: Changing current directory to one with promise binaries dir_prefix = "../build_tuner/" - - promiseTunerLoss1(Bench, dir_prefix) - promiseTunerLoss2(Bench, dir_prefix) + if BASELINE: + tuner_runs = Bench.autotuner_runs * 2 + skip_layers = "0" + runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_1, 0.85, tuner_runs, skip_layers) + runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_2, 1.7, tuner_runs, skip_layers) + runPromiseTunerCmd(Bench, dir_prefix, Bench.result_dir_3, 2.5, tuner_runs, skip_layers) + + else: + + promiseTunerLoss1(Bench, dir_prefix) + + promiseTunerLoss2(Bench, dir_prefix) - promiseTunerLoss3(Bench, dir_prefix) + promiseTunerLoss3(Bench, dir_prefix) diff --git a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py index 80e67a1bc372e6628404e9852c9f7809cbfd73be..b38efa9c82a1da4440fe4653b72b1beb89032a5f 100644 --- a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py +++ b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/measure_confidence2.py @@ -19,8 +19,8 @@ def getAccuracy(file_name): return accuracy -total_runs = 60.0 -fails_allowed = 4 +total_runs = 40.0 +fails_allowed = 3 skip_lines = 0 @@ -297,6 +297,8 @@ def getConfigCost(layer_costs, config_str): continue orig_cost += layer_costs[it] + + #print ("orig_cost = ", orig_cost, " flag_value = ", flag_value) if flag_value == 11: total_cost += layer_costs[it] @@ -307,10 +309,12 @@ def getConfigCost(layer_costs, config_str): elif flag_value < 8: divisor = 5 + (7 - flag_value) total_cost += (layer_costs[it] / divisor) - + it += 1 - - return total_cost, (orig_cost / total_cost) + + speedup = orig_cost * 1.0 / total_cost * 1.0 + + return total_cost, speedup @@ -406,6 +410,7 @@ def dump_promise_confidence_files(binary, result_dir, layer_file_path, def dump_promise_confidence_files2(binary, result_dir, layer_file_path, num_flags, accuracy, layer_costs, confidence): + #result_dir = args.result_dir output_dir = result_dir + "/high_confidence" input_dir = result_dir + "/full_results" @@ -445,6 +450,49 @@ def dump_promise_confidence_files2(binary, result_dir, layer_file_path, + +def dump_promise_confidence_files3(binary, input_dir, output_dir, layer_file_path, + num_flags, accuracy, layer_costs, confidence): + + + #result_dir = args.result_dir + #output_dir = result_dir + "/high_confidence" + #input_dir = result_dir + "/full_results" + + if not os.path.exists(output_dir): + os.mkdir(output_dir) + + layer_sizes = processLayerFile(layer_file_path); + print layer_sizes + sleep(2) + + confidence_list = compute_promise_confidence2(binary, accuracy, confidence, layer_costs, input_dir, output_dir) + print confidence_list + + # Ascending sort on accuracy + sorted_list = sorted(confidence_list, key = lambda tup: tup[1]) + + promise_file = open(output_dir + "/promise_confs.txt", "w+") + confidence_file = open(output_dir + "/confidence_summary.txt", "w+") + + max_configs = 50 + it_count = 0 + for x in sorted_list: + if x[1] > accuracy and x[0] > confidence: + config_str = getLayerConfigStr(x[3], layer_sizes, num_flags) + promise_file.write(config_str + "\n") + it_count += 1 + if it_count > max_configs: + break + + confidence_file.write(str(x[0]) + "\t" + str(x[1]) + "\t" + str(x[3]) + "\n") + + promise_file.close() + confidence_file.close() + + print "Dumped Confidence Summary" + + diff --git a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/pareto_curve.py b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/pareto_curve.py new file mode 100644 index 0000000000000000000000000000000000000000..0fda8f742cc0ef75e4b84232f397872b04554dd6 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/pareto_curve.py @@ -0,0 +1,259 @@ + + +import os +import shutil +from measure_confidence2 import getConfigCost + + +class Config: + def __init__(self): + self.avg_accuracy = 0 + self.avg_loss = 0 + self.speedup = 1 + self.fname = "" + self.flags = [] + + + + +def skipFile(fname): + + skip_files = {} + skip_files["confidence_summary.txt"] = 1 + skip_files["promise_confs.txt"] = 1 + + if "accuracy" in fname: + return True + + if fname in skip_files: + return True + else: + return False + + + + +def loadConfigData(result_dir, layer_costs, baseline_accuracy): + + config_arr = [] + + #result_dir += "/promise_tuner/high_confidence/" + file_names = os.listdir(result_dir) + + + for fname in file_names: + if not skipFile(fname): + + fpath = result_dir + fname + config = Config() + f = open(fpath, "r") + + config_str = f.read() + cost, speedup = getConfigCost(layer_costs, config_str) + + config.speedup = speedup + config.fname = fname + + fpath2 = fpath + "_accuracy" + f2 = open(fpath2, "r") + acc_str = f2.read().strip() + accuracy = float(acc_str) + + config.avg_accuracy = accuracy + config.avg_loss = baseline_accuracy - accuracy + + config_arr.append(config) + + + return config_arr + + + +AL_THRESHOLD = 0.1 +SPEEDUP_BAND_SIZE = 0.3 +ENERGY_BAND_SIZE = 10 + + +class Configuration: + def __init__(self, name, speedup, energy, accuracy, accuracy_loss): + self.name = name + self.speedup = speedup + self.energy = energy + self.accuracy = accuracy + self.accuracy_loss = accuracy_loss + def __repr__(self): + return repr((self.name, self.speedup, self.energy, self.accuracy, self.accuracy_loss)) + +configuration_objects = [ + Configuration('conf1', 1.05, 15, 85, 1.2), + Configuration('conf2', 2.51, 12, 83, 1.4), + Configuration('conf3', 2.05, 10, 84, 0.8), +] + +def compute_pareto_points(configurations): + speedupconfigurations = [] + energyconfigurations = [] + #sort configurations based on speedup + sorted_configurations = sorted(configurations, key=lambda conf: conf.accuracy_loss) + + start_idx = 0 + while start_idx < len(sorted_configurations): + end_idx = start_idx + 1; + # find end_idx + while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : + end_idx += 1 + # find best speedup end energy in this accuracy loss level + sp = -1.0 + sp_idx = 0 + en = -1.0 + en_idx = 0 + for i in range(start_idx, end_idx): + if sorted_configurations[i].speedup > sp: + sp = sorted_configurations[i].speedup + sp_idx = i + if sorted_configurations[i].energy > en: + en = sorted_configurations[i].energy + en_idx = i + sp_not_dominated = True + # if not empty list of configurations + if speedupconfigurations: + if speedupconfigurations[-1].speedup >= sp: + sp_not_dominated = False + en_not_dominated = True + # if not empty list of configurations + if energyconfigurations: + if energyconfigurations[-1].energy >= en: + en_not_dominated = False + if sp_not_dominated: + speedupconfigurations.append(sorted_configurations[sp_idx]) + if en_not_dominated: + energyconfigurations.append(sorted_configurations[en_idx]) + # outer while loop variable increment + start_idx = end_idx + return [speedupconfigurations, energyconfigurations] + + +def compute_pareto_points_with_margin(configurations, speedup_band_width, energy_band_width): + speedupconfigurations = [] + energyconfigurations = [] + #sort configurations based on speedup + sorted_configurations = sorted(configurations, key=lambda conf: conf.accuracy_loss) + + idx_to_sp_conf_dict = {} + idx_to_en_conf_dict = {} + + start_idx = 0 + while start_idx < len(sorted_configurations): + end_idx = start_idx + 1; + # find end_idx + while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : + end_idx += 1 + # find best speedup end energy in this accuracy loss level + sp = -1.0 + sp_idx = 0 + en = -1.0 + en_idx = 0 + for i in range(start_idx, end_idx): + if sorted_configurations[i].speedup > sp: + sp = sorted_configurations[i].speedup + sp_idx = i + if sorted_configurations[i].energy < en: + en = sorted_configurations[i].energy + en_idx = i + sp_not_dominated = True + # if not empty list of configurations + if speedupconfigurations: + if speedupconfigurations[-1].speedup >= sp: + sp_not_dominated = False + en_not_dominated = True + # if not empty list of configurations + if energyconfigurations: + if energyconfigurations[-1].energy >= en: + en_not_dominated = False + if sp_not_dominated: + speedupconfigurations.append(sorted_configurations[sp_idx]) + idx_to_sp_conf_dict[start_idx] = len(speedupconfigurations)-1 + if en_not_dominated: + energyconfigurations.append(sorted_configurations[en_idx]) + idx_to_en_conf_dict[start_idx] = len(energyconfigurations)-1 + # outer while loop variable increment + start_idx = end_idx + + # We want to add configurations in a band of a certain width around the curves + # not possible to do during contruction, because the quality of the curve would + # deteriorate quickly + + AdjustedSpeedupCurve = [] + AdjustedEnergyCurve = [] + + start_idx = 0 + while start_idx < len(sorted_configurations): + end_idx = start_idx + 1; + # find end_idx + while end_idx < len(sorted_configurations) and (sorted_configurations[end_idx].accuracy_loss - sorted_configurations[start_idx].accuracy_loss < AL_THRESHOLD) : + end_idx += 1 + for i in range(start_idx, end_idx): + if sorted_configurations[i].speedup + speedup_band_width >= speedupconfigurations[idx_to_sp_conf_dict[start_idx]].speedup: + AdjustedSpeedupCurve.append(sorted_configurations[i]) + if sorted_configurations[i].energy + energy_band_width >= energyconfigurations[idx_to_en_conf_dict[start_idx]].energy: + AdjustedEnergyCurve.append(sorted_configurations[i]) + # outer while loop variable increment + start_idx = end_idx + + return [AdjustedSpeedupCurve, AdjustedEnergyCurve] + + + +def findParetoConfigs(base_dir, layer_costs, accuracy): + + result_dir = base_dir + "/pareto/" + try: + os.mkdir(result_dir) + except: + print "could not create dir" + + input_dir = base_dir + "/full_results/" + #result_dir = "../build_tuner/tuner_results/alexnet_cifar10/loss_3/batch15" + config_arr = loadConfigData(input_dir, layer_costs, accuracy) + + config_list = [] + + it = 0 + for config in config_arr: + config = Configuration(config.fname , config.speedup, 100, config.avg_accuracy, config.avg_loss) + config_list.append(config) + + + if len(config_list) < 30: + SPEEDUP_BAND_SIZE = 1.2 + + + ASC, AEC = compute_pareto_points_with_margin(config_list, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE) + + + print ("len(config_list) = ", len(config_list)) + print ("len(ASC) = ", len(ASC)) + + #print (ASC) + #print (config_list) + + for conf in ASC: + #dst_path = conf.name.replace("full_results", "pareto") + src_path = base_dir + "/full_results/" + conf.name + dst_path = base_dir + "/pareto/" + conf.name + shutil.copy(src_path, dst_path) + + + +if __name__ == "__main__": + + get_pareto_configs("") + + #SC, EC = compute_pareto_points(configuration_objects) + #ASC, AEC = compute_pareto_points_with_margin(configuration_objects, SPEEDUP_BAND_SIZE, ENERGY_BAND_SIZE) + + #print(SC) + #print(EC) + + #print(ASC) + #print(AEC) diff --git a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py index 07a5bf0bcf4b9135a746f0dd733daa2699d7ad58..87ed35bbc4bcac6288c30454ba1d650956dd9118 100644 --- a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py +++ b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/promise_tuner3.py @@ -21,9 +21,10 @@ import subprocess import threading import psutil -from measure_confidence2 import dump_promise_confidence_files2 +from measure_confidence2 import dump_promise_confidence_files3 from select_top_results import select_top_results from time import sleep +from pareto_curve import findParetoConfigs layer_file = "" @@ -48,6 +49,7 @@ def readCostFile(file_path): cost = float(x.strip()) layer_costs.append(cost) + print ("len(layer_costs) = ", layer_costs) f.close() @@ -192,7 +194,11 @@ class ClangFlagsTuner(MeasurementInterface): evaluated_configs[accuracy] = 1 shutil.copy('promise_flags', output_dir + '/' + binary_name + '_' + str(test_id)) - + f_acc = open(output_dir + '/' + binary_name + '_' + str(test_id) + "_accuracy", "w") + f_acc.write(str(accuracy)) + f_acc.close() + + print "done with one run" test_id += 1 @@ -203,12 +209,17 @@ class ClangFlagsTuner(MeasurementInterface): def save_final_config(self, configuration): print "Dumping High Confidence results \n" - sleep(20) + sleep(2) + + + findParetoConfigs(orig_result_dir, layer_costs, accuracy_threshold) + + input_dir = orig_result_dir + "/pareto/" + output_dir = orig_result_dir + "/high_confidence/" # Only dumping files with 95% confidence - dump_promise_confidence_files2(binary_name, orig_result_dir, layer_file, num_flags, accuracy_threshold, layer_costs, 95) + dump_promise_confidence_files3(binary_name, input_dir, output_dir, layer_file, num_flags, accuracy_threshold, layer_costs, 95) #select_top_results(orig_result_dir + "/high_confidence") - """ diff --git a/llvm/projects/soc_simulator/src/driver.py b/llvm/projects/soc_simulator/src/driver.py index b685170da40c17dc45b7258cbbe2166ac52736fc..1df46eec8fc34cee7c6a7683d1faaae4a94639ca 100644 --- a/llvm/projects/soc_simulator/src/driver.py +++ b/llvm/projects/soc_simulator/src/driver.py @@ -3,291 +3,297 @@ import os import subprocess import sys -def build_nested_default_dict(): - return defaultdict(build_nested_default_dict) +class Driver: + fp16_swing = 8 -def is_conv(operation_name): - return operation_name.startswith("Conv") + class ApproxTypes: + FP16 = 0 + FP32 = 1 + PROMISE = 2 -def is_nml(operation_name): - return operation_name.startswith("NML") + results_time_key = "Time" + results_energy_key = "Energy" -def is_fc(operation_name): - return operation_name.startswith("FC") -# NOTE: Use an OrderedDict if we want to search by operation name -# Using a list bc we care about the order the data is read in -# since it corresponds to the data in the configuration file -tensor_layers = [] -def parse_tensor_layer_file(layer_filename): - if not os.path.isfile(layer_filename): - print("ERROR: %s was not found." % layer_filename) - exit(1) + def driver(self): + self.__parse_tensor_layer_file() + self.__parse_tensor_table() + self.__run_simulations() + self.__display_results() - layer_file = open(layer_filename, "r") - for line in layer_file: - layer_data = line.strip().split(',') - layer_name = layer_data[0] - - tensor_layer = defaultdict(str) - tensor_layer["Name"] = layer_name - - if is_conv(layer_name): - tensor_layer["N"] = float(layer_data[1]) - tensor_layer["Cin"] = float(layer_data[2]) - tensor_layer["H"] = float(layer_data[3]) - tensor_layer["W"] = float(layer_data[4]) - tensor_layer["Cout"] = float(layer_data[5]) - tensor_layer["Kh"] = float(layer_data[7]) - tensor_layer["Kw"] = float(layer_data[8]) - tensor_layer["Sh"] = float(layer_data[9]) - tensor_layer["Sw"] = float(layer_data[10]) - - elif is_fc(layer_name): - tensor_layer["RA"] = float(layer_data[1]) - tensor_layer["CA"] = float(layer_data[2]) - tensor_layer["RB"] = float(layer_data[3]) - tensor_layer["CB"] = float(layer_data[4]) - - elif not is_nml(layer_name): # TODO should we store data for NMLs? - print("ERROR: Invalid layer name %s" % layer_name) - exit(1) - - tensor_layers.append(tensor_layer) - layer_file.close() - -# [layer_name][operation_name][cols] -# Operation names need to be stored in order of insertion -tensor_table = defaultdict(lambda: list(defaultdict(str))) - -def parse_tensor_table(table_filename): - if not os.path.isfile(table_filename): - print("ERROR: %s was not found." % table_filename) - exit(1) - table_file = open(table_filename, "r") - line = table_file.readline().strip() - while line: - # Line here MUST be a header or there's a bug - # Get the description of the layer - assert(line.startswith("**")) + def __init__(self, layer_filename, table_filename, config_filename, results_filename): + self.__layer_filename = layer_filename + self.__table_filename = table_filename + self.__config_filename = config_filename + self.__results_filename = results_filename - header_contents = line.split(' ')[1:] - layer_name = header_contents[0] - num_ops = int(header_contents[1]) - col_names = header_contents[2:] + # NOTE: Use an OrderedDict if we want to search by operation name + # Using a list bc we care about the order the data is read in + # since it corresponds to the data in the configuration file + self.__tensor_layers = [] - layer_operations = [] + # [layer_name][operation_name][cols] + # Operation names need to be stored in order of insertion + self.__tensor_table = defaultdict(lambda: list(defaultdict(str))) - # Go through all operations in the layer - for op_count in range(num_ops): - operation_data = defaultdict(str) + # [Time/Energy][number corresponding to order the layer config was read in] = time/energy + self.__aggregate_results = defaultdict(lambda: defaultdict(float)) + self.__config_count = 0 - line = table_file.readline().strip() - op_data = line.split(' ') - op_name = op_data[0] - operation_data["Name"] = op_name - # Number of data items (#s) needs to match up with the # of cols - assert(len(op_data) - 1 == len(col_names)) + @staticmethod + def is_conv(operation_name): + return operation_name.startswith("Conv") + + + @staticmethod + def is_nml(operation_name): + return operation_name.startswith("NML") + + + @staticmethod + def is_fc(operation_name): + return operation_name.startswith("FC") + + + def __parse_tensor_layer_file(self): + if not os.path.isfile(self.__layer_filename): + print("ERROR: %s was not found." % self.__layer_filename) + exit(1) + + layer_file = open(self.__layer_filename, "r") + for line in layer_file: + layer_data = line.strip().split(',') + layer_name = layer_data[0] + + tensor_layer = defaultdict(str) + tensor_layer["Name"] = layer_name + + if Driver.is_conv(layer_name): + tensor_layer["N"] = float(layer_data[1]) + tensor_layer["Cin"] = float(layer_data[2]) + tensor_layer["H"] = float(layer_data[3]) + tensor_layer["W"] = float(layer_data[4]) + tensor_layer["Cout"] = float(layer_data[5]) + tensor_layer["Kh"] = float(layer_data[7]) + tensor_layer["Kw"] = float(layer_data[8]) + tensor_layer["Sh"] = float(layer_data[9]) + tensor_layer["Sw"] = float(layer_data[10]) + + elif Driver.is_fc(layer_name): + tensor_layer["RA"] = float(layer_data[1]) + tensor_layer["CA"] = float(layer_data[2]) + tensor_layer["RB"] = float(layer_data[3]) + tensor_layer["CB"] = float(layer_data[4]) - # Go through all data items (each col element) per operation - for i in range(len(col_names)): - operation_data[col_names[i]] = float(op_data[i + 1]) + elif not Driver.is_nml(layer_name): # TODO should we store data for NMLs? + print("ERROR: Invalid layer name %s" % layer_name) + exit(1) - layer_operations.append(operation_data) + self.__tensor_layers.append(tensor_layer) + layer_file.close() - tensor_table[layer_name] = layer_operations + + def __parse_tensor_table(self): + if not os.path.isfile(self.__table_filename): + print("ERROR: %s was not found." % self.__table_filename) + exit(1) + table_file = open(self.__table_filename, "r") line = table_file.readline().strip() - table_file.close() - -fp16_swing = 8 - -class ApproxTypes: - FP16 = 0 - FP32 = 1 - PROMISE = 2 - -def get_approx_type(approx_type): - if approx_type == 0: - return "fp16" - elif approx_type == 1: - return "fp32" - return "promise" - -def is_promise(config_layer): - # TODO overhead in call to split? - return float(config_layer.split(' ')[0]) < fp16_swing - - -def quantize(curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): - #print(get_approx_type(curr_layer), get_approx_type(prev_layer)) - if curr_layer == prev_layer or curr_layer == ApproxTypes.PROMISE \ - or prev_layer == ApproxTypes.PROMISE: # No quantization needed - return 0.0, 0.0 - - layer_name = layer_data["Name"] - - # NOTE: Ignoring logic where curr == promise or prev == promise bc - # smartDMA is always true so we'd return near the beginning of the method - - # Get h2f/f2h data using the first tensor operation in the layer - # (which is why order matters in the tensor table) - tensor_op_row = tensor_table[layer_name][h2f_f2h_operation_ind] - if curr_layer == ApproxTypes.FP32: - time = tensor_op_row["h2f_time"] - energy = tensor_op_row["h2f_energy"] - elif curr_layer == ApproxTypes.FP16: - time = tensor_op_row["f2h_time"] - energy = tensor_op_row["f2h_energy"] - - print("Quantization: (%f, %f)" % (time, energy)) - return (time, energy) - -def run_promise_simulation(swing, layer_data): - layer_name = layer_data["Name"] - patch_factor = 1 - - if is_conv(layer_name): - rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \ - / (layer_data["Sh"] * layer_data["Sw"]) - cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"] - rows_b = cols_a - cols_b = layer_data["Cout"] - patch_factor = layer_data["Kh"] * layer_data["Kw"] - elif is_fc(layer_name): - rows_a = layer_data["RA"] - cols_a = layer_data["CA"] - rows_b = cols_a - cols_b = layer_data["CB"] - else: - print("PROMISE can't run whatever this layer is.") - exit(1) - #print("[%f x %f] x [%f x %f] : %f" % (rows_a, cols_a, rows_b, cols_b, swing)) - # Run promise simulator - # TODO need to print time and energy in the ptm runner so we can pipe it - output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \ - str(cols_b), str(patch_factor), str(swing)], \ - stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0] - total_time_energy = output.strip().split(',') - - assert(len(total_time_energy) == 2) - print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1])) - return float(total_time_energy[0]), float(total_time_energy[1]) - - -def run_gpu_simulation(curr_layer, layer_name, tensor_ind): - tensor_info = tensor_table[layer_name][tensor_ind] - if curr_layer == ApproxTypes.FP32: - conversion_time = tensor_info["fp32_time"] - conversion_energy = tensor_info["fp32_energy"] - else: - conversion_time = tensor_info["fp16_time"] - conversion_energy = tensor_info["fp16_energy"] - print("GPU: (%f, %f)" % (conversion_time, conversion_energy)) - return (conversion_time, conversion_energy) - -# Default dict of default dicts -results_time_key = "Time" -results_energy_key = "Energy" -# [Time/Energy][number corresponding to order the layer config was read in] = time/energy -aggregate_results = defaultdict(lambda: defaultdict(float)) -config_count = 0 - -def run_simulations(config_filename): - global config_count - - if not os.path.isfile(config_filename): - print("ERROR: %s was not found" % config_filename) - exit(1) - config_file = open(config_filename, "r") - - # each line = indepedent configuration - # layers are separated by commas - # tensor ops are separated by spaces - for config in config_file: - config_layers = config.strip().split(',') - prev_layer = ApproxTypes.FP32 - curr_layer = None - - for layer_ind, config_layer in enumerate(config_layers): # level - layer_data = tensor_layers[layer_ind] # layer - layer_name = layer_data["Name"] - - if is_promise(config_layer): - print("Running layer %s on PROMISE" % layer_name) - curr_layer = ApproxTypes.PROMISE - quant_time, quant_energy = quantize(curr_layer, prev_layer, 0, layer_data) - # Compute - time, energy = run_promise_simulation(config_layer, layer_data) - print(time, energy) - aggregate_results[results_time_key][config_count] += time - aggregate_results[results_energy_key][config_count] += energy - else: - print("Running layer %s on the GPU" % layer_name) - tensor_ops = config_layer.split(' ') - - total_time = 0 - total_energy = 0 - for tensor_ind, tensor_op in enumerate(tensor_ops): # sublevle - tensor_op = int(tensor_op) - if tensor_op == fp16_swing: - curr_layer = ApproxTypes.FP16 - else: - curr_layer = ApproxTypes.FP32 - quant_time, quant_energy = quantize(curr_layer, prev_layer, tensor_ind, \ - layer_data) - conv_time, conv_energy = run_gpu_simulation(curr_layer, layer_name, tensor_ind) - total_time += quant_time + conv_time - total_energy += quant_energy + conv_energy - - aggregate_results[results_time_key][config_count] += total_time - aggregate_results[results_energy_key][config_count] += total_energy - - prev_layer = curr_layer - config_count += 1 - print("\n") - config_file.close() - - -def display_results(results_filename): - results_file = open(results_filename, "w") - attributes_to_print = [results_time_key, results_energy_key] - - for attribute in attributes_to_print: - results_file.write("%s\n" % attribute) - results_file.write("Configuration,Total,Improvement\n") - - baseline_val = aggregate_results[attribute][0] - print(baseline_val) - best_config = None - best_result = None - - for config_ind in range(config_count): - results_file.write("c%d" % config_ind) - time_or_energy_val = aggregate_results[attribute][config_ind] - results_file.write(",%f" % time_or_energy_val) - results_file.write(",%f\n" % (baseline_val / (time_or_energy_val + 0.0001))) - - if not best_result or time_or_energy_val < best_result: - best_result = time_or_energy_val - best_config = config_ind - results_file.write("\nc%d,%f\n\n" % (best_config, aggregate_results[attribute][best_config])) - results_file.close() - -def driver(tensor_layers_file, tensor_table_file, conf_file, output_file): - print(tensor_layers_file, tensor_table_file, conf_file, output_file) - parse_tensor_layer_file(tensor_layers_file) - parse_tensor_table(tensor_table_file) - run_simulations(conf_file) - display_results(output_file) + while line: + # Line here MUST be a header or there's a bug + # Get the description of the layer + assert(line.startswith("**")) + + header_contents = line.split(' ')[1:] + layer_name = header_contents[0] + num_ops = int(header_contents[1]) + col_names = header_contents[2:] + + layer_operations = [] + + # Go through all operations in the layer + for op_count in range(num_ops): + operation_data = defaultdict(str) + + line = table_file.readline().strip() + op_data = line.split(' ') + op_name = op_data[0] + operation_data["Name"] = op_name + + # Number of data items (#s) needs to match up with the # of cols + assert(len(op_data) - 1 == len(col_names)) + + # Go through all data items (each col element) per operation + for i in range(len(col_names)): + operation_data[col_names[i]] = float(op_data[i + 1]) + + layer_operations.append(operation_data) + + self.__tensor_table[layer_name] = layer_operations + line = table_file.readline().strip() + table_file.close() + + + @staticmethod + def is_promise(config_layer): + return float(config_layer.split(' ')[0]) < Driver.fp16_swing + + + def __quantize(self, curr_layer, prev_layer, h2f_f2h_operation_ind, layer_data): + if curr_layer == prev_layer or curr_layer == Driver.ApproxTypes.PROMISE \ + or prev_layer == Driver.ApproxTypes.PROMISE: # No quantization needed + return 0.0, 0.0 + + layer_name = layer_data["Name"] + + # NOTE: Ignoring logic where curr == promise or prev == promise bc + # smartDMA is always true so we'd return near the beginning of the method + + # Get h2f/f2h data using the first tensor operation in the layer + # (which is why order matters in the tensor table) + tensor_op_row = self.__tensor_table[layer_name][h2f_f2h_operation_ind] + if curr_layer == Driver.ApproxTypes.FP32: + time = tensor_op_row["h2f_time"] + energy = tensor_op_row["h2f_energy"] + elif curr_layer == Driver.ApproxTypes.FP16: + time = tensor_op_row["f2h_time"] + energy = tensor_op_row["f2h_energy"] + + print("Quantization: (%f, %f)" % (time, energy)) + return (time, energy) + + + def __run_promise_simulation(self, swing, layer_data): + layer_name = layer_data["Name"] + patch_factor = 1 + + if Driver.is_conv(layer_name): + rows_a = layer_data["N"] * layer_data["H"] * layer_data["W"] \ + / (layer_data["Sh"] * layer_data["Sw"]) + cols_a = layer_data["Cin"] * layer_data["Kh"] * layer_data["Kw"] + rows_b = cols_a + cols_b = layer_data["Cout"] + patch_factor = layer_data["Kh"] * layer_data["Kw"] + elif Driver.is_fc(layer_name): + rows_a = layer_data["RA"] + cols_a = layer_data["CA"] + rows_b = cols_a + cols_b = layer_data["CB"] + else: + print("PROMISE can't run whatever this layer is.") + exit(1) + # Run promise simulator + # TODO need to print time and energy in the ptm runner so we can pipe it + output = subprocess.Popen(["./ptm", str(rows_a), str(cols_a), str(rows_b), \ + str(cols_b), str(patch_factor), str(swing)], \ + stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0] + total_time_energy = output.strip().split(',') + + assert(len(total_time_energy) == 2) + print("PROMISE: (%s, %s)" % (total_time_energy[0], total_time_energy[1])) + return float(total_time_energy[0]), float(total_time_energy[1]) + + + def __run_gpu_simulation(self, curr_layer, layer_name, tensor_ind): + tensor_info = self.__tensor_table[layer_name][tensor_ind] + if curr_layer == Driver.ApproxTypes.FP32: + conversion_time = tensor_info["fp32_time"] + conversion_energy = tensor_info["fp32_energy"] + else: + conversion_time = tensor_info["fp16_time"] + conversion_energy = tensor_info["fp16_energy"] + print("GPU: (%f, %f)" % (conversion_time, conversion_energy)) + return (conversion_time, conversion_energy) + + + def __run_simulations(self): + print("run sim") + if not os.path.isfile(self.__config_filename): + print("ERROR: %s was not found" % self.__config_filename) + exit(1) + + config_file = open(self.__config_filename, "r") + + # each line = indepedent configuration + # layers are separated by commas + # tensor ops are separated by spaces + for config in config_file: + config_layers = config.strip().split(',') + prev_layer = Driver.ApproxTypes.FP32 + curr_layer = None + + for layer_ind, config_layer in enumerate(config_layers): # level + layer_data = self.__tensor_layers[layer_ind] # layer + layer_name = layer_data["Name"] + + if Driver.is_promise(config_layer): + print("Running layer %s on PROMISE" % layer_name) + curr_layer = Driver.ApproxTypes.PROMISE + quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, 0, layer_data) + # Compute + time, energy = self.__run_promise_simulation(config_layer, layer_data) + print(time, energy) + self.__aggregate_results[Driver.results_time_key][self.__config_count] += time + self.__aggregate_results[Driver.results_energy_key][self.__config_count] += energy + else: + print("Running layer %s on the GPU" % layer_name) + tensor_ops = config_layer.split(' ') + + total_time = 0 + total_energy = 0 + for tensor_ind, tensor_op in enumerate(tensor_ops): # sublevle + tensor_op = int(tensor_op) + if tensor_op == Driver.fp16_swing: + curr_layer = Driver.ApproxTypes.FP16 + else: + curr_layer = Driver.ApproxTypes.FP32 + quant_time, quant_energy = self.__quantize(curr_layer, prev_layer, tensor_ind, layer_data) + conv_time, conv_energy = self.__run_gpu_simulation(curr_layer, layer_name, tensor_ind) + total_time += quant_time + conv_time + total_energy += quant_energy + conv_energy + + self.__aggregate_results[Driver.results_time_key][self.__config_count] += total_time + self.__aggregate_results[Driver.results_energy_key][self.__config_count] += total_energy + + prev_layer = curr_layer + self.__config_count += 1 + print("\n") + config_file.close() + + + def __display_results(self): + results_file = open(self.__results_filename, "w") + attributes_to_print = [Driver.results_time_key, Driver.results_energy_key] + + for attribute in attributes_to_print: + results_file.write("%s\n" % attribute) + results_file.write("Configuration,Total,Improvement\n") + + baseline_val = self.__aggregate_results[attribute][0] + print(baseline_val) + best_config = None + best_result = None + + for config_ind in range(self.__config_count): + results_file.write("c%d" % config_ind) + time_or_energy_val = self.__aggregate_results[attribute][config_ind] + + # Using repr to keep all decimal digits when writing to file + results_file.write(",%s" % repr(time_or_energy_val)) + results_file.write(",%s\n" % repr(baseline_val / (time_or_energy_val + 0.0001))) + + if not best_result or time_or_energy_val < best_result: + best_result = time_or_energy_val + best_config = config_ind + results_file.write("\nc%d,%s\n\n" % (best_config, repr(self.__aggregate_results[attribute][best_config]))) + results_file.close() + if __name__ == "__main__": if len(sys.argv) != 5: print("Usage: python driver.py <layer info> <tensor info> <configurations> <results file>") exit(1) - test_layers_file = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_mobilenet/mobilenet_layers.txt" - test_table_file = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_pldi/mobilenet_results/mobilenet_tensors.txt" - test_conf_file = "/home/nvidia/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/build_mobilenet/mobilenet_conf2.txt" - driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) + Driver(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]).driver()