diff --git a/llvm/projects/hpvm-tensor-rt/bin/extractQuantRange.py b/llvm/projects/hpvm-tensor-rt/bin/extractQuantRange.py new file mode 100644 index 0000000000000000000000000000000000000000..0b7f09d92e91894d284b40cc0bd2d346c08e36c7 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/bin/extractQuantRange.py @@ -0,0 +1,42 @@ + + +import sys + + +if __name__ == "__main__": + + f = open(sys.argv[1], "r") + f2 = open("quant_ranges.txt", "w+") + + layer_line = False + for x in f: + if "ConvLayer_PROMISE" in x or "FCLayer_PROMISE" in x or layer_line == True: + if layer_line == True: + layer_line = False + else: + layer_line = True + + print x + toks = x.split(",") + + for tok in toks: + tok = tok.strip() + tok_val = "" + try: + tok_val = float(tok) + try: + tok_val = int(tok) + except: + print (tok_val) + f2.write(str(tok_val) + " ") + #f2.write("tok_val = ", tok_val + " ") + except: + continue + + f2.write("\n") + + + f.close() + f2.close() + + diff --git a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py index 9281773acf6f64b0a0d6b7b4a830b54dfaac6c54..7e969271c20031dab9f302b333a4f7feb0338871 100644 --- a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py +++ b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/benchmarks.py @@ -15,7 +15,7 @@ # Batch 20: 3-Skip levels + + 2 runs + 1500 Runs + EnergyBandSize now % of Max (Compare against Batch19 -batch_id = "batch200" +batch_id = "batch201" class Benchmark: def __init__(self): @@ -67,7 +67,8 @@ Alexnet1.loss2_result_file = "tuner_results/alexnet_cifar10/loss_2/promise_tuned Alexnet1.autotuner_runs = 1500 Alexnet1.tuner_accuracy = 79.9 -Alexnet1.promise_accuracy = 79.9 +#Alexnet1.promise_accuracy = 79.9 +Alexnet1.promise_accuracy = 79.5 Alexnet1.validation_accuracy = 79.19 bench_tuner_data["alexnet_cifar10"] = Alexnet1 @@ -97,7 +98,8 @@ Alexnet2.layer_knobs = "../opentuner/data/alexnet2/knobs.txt" #Alexnet2.loss2_result_file = "tuner_results/alexnet2_cifar10/loss_2/promise_tuned_confs/promise_confs.txt" Alexnet2.autotuner_runs = 1500 Alexnet2.tuner_accuracy = 84.19 -Alexnet2.promise_accuracy = 84.19 +#Alexnet2.promise_accuracy = 84.19 +Alexnet2.promise_accuracy = 84.8 Alexnet2.validation_accuracy = 85.15 bench_tuner_data["alexnet2_cifar10"] = Alexnet2 @@ -131,7 +133,8 @@ Alexnet3.loss2_result_file = "tuner_results/vgg16_cifar10/loss_2/promise_tuned_c Alexnet3.autotuner_runs = 1500 Alexnet3.tuner_accuracy = 90.19 -Alexnet3.promise_accuracy = 90.19 +#Alexnet3.promise_accuracy = 90.19 +Alexnet3.promise_accuracy = 89.55 Alexnet3.validation_accuracy = 89.05 bench_tuner_data["vgg16_cifar10"] = Alexnet3 @@ -163,7 +166,8 @@ Alexnet4.loss2_result_file = "tuner_results/resnet18_cifar10/loss_2/promise_tune Alexnet4.autotuner_runs = 1500 Alexnet4.tuner_accuracy = 89.6 -Alexnet4.promise_accuracy = 89.59 +#Alexnet4.promise_accuracy = 89.59 - 1000 images +Alexnet4.promise_accuracy = 89.94 Alexnet4.validation_accuracy = 89.65 bench_tuner_data["resnet18_cifar10"] = Alexnet4 @@ -197,7 +201,8 @@ Alexnet5.loss1_result_file = "tuner_results/vgg_cifar100/loss_1/promise_tuned_co Alexnet5.loss2_result_file = "tuner_results/vgg_cifar100/loss_2/promise_tuned_confs/promise_confs.txt" Alexnet5.autotuner_runs = 1500 Alexnet5.tuner_accuracy = 67.95 -Alexnet5.promise_accuracy = 66.8 +#Alexnet5.promise_accuracy = 66.8 +Alexnet5.promise_accuracy = 70.1 Alexnet5.validation_accuracy = 68.65 bench_tuner_data["vgg16_cifar100"] = Alexnet5 @@ -265,7 +270,8 @@ Alexnet7.loss1_result_file = "tuner_results/mobilenet/loss_1/batch1/promise_tune Alexnet7.loss2_result_file = "tuner_results/mobilenet/loss_2/batch1/promise_tuner/high_confidence/promise_confs.txt" Alexnet7.autotuner_runs = 1500 Alexnet7.tuner_accuracy = 84.8 -Alexnet7.promise_accuracy = 84.8 +#Alexnet7.promise_accuracy = 84.8 +Alexnet7.promise_accuracy = 83.65 Alexnet7.validation_accuracy = 84.4 bench_tuner_data["mobilenet_cifar10"] = Alexnet7 @@ -298,7 +304,8 @@ Alexnet8.loss2_result_file = "../build_tuner/tuner_results/mobilenet_shallow/los Alexnet8.autotuner_runs = 1500 Alexnet8.tuner_accuracy = 87.6 -Alexnet8.promise_accuracy = 87.59 +#Alexnet8.promise_accuracy = 87.59 +Alexnet8.promise_accuracy = 89.25 Alexnet8.validation_accuracy = 88.5 bench_tuner_data["mobilenet_shallow"] = Alexnet8 diff --git a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/buildRtConfig.py b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/buildRtConfig.py index ca1772637c0c294386c894238e457edc71c01ca5..6a07ef86e53d2b4b6372e1e253611ba6f018aaad 100644 --- a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/buildRtConfig.py +++ b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/buildRtConfig.py @@ -138,7 +138,7 @@ def loadConfigData(result_dir, baseline_accuracy): config.avg_loss = baseline_accuracy - avg_accuracy config.speedup = speedup config.fname = fname - print ("acc = " + str(avg_accuracy) + "\n") + #print ("acc = " + str(avg_accuracy) + "\n") else: flag = int(x.strip()) config.flags.append(flag) @@ -242,7 +242,8 @@ def buildConfigStr(config, layer_desc): def dumpConfig(layer_desc, config_arrs, result_dir): - f = open(result_dir + "/tuner_confs.txt", "w+") + + f = open(result_dir + "/tuner_confs_11.txt", "w+") it = 1 for config in config_arrs: @@ -274,34 +275,82 @@ def generateConf(Bench): +def dumpBaselineConfs(Bench): + + layer_desc = loadLayerDesc(Bench.layer_file) + + f = open(Bench.base_dir + "/tuner_confs_base.txt", "w+") + + f.write("+++++\n") + f.write("conf" + str(1) + " " + str(1) + " 0 " + str(Bench.promise_accuracy) + " " + str(0) + "\n") + + config = Config() + flags = [] + for i in range(Bench.num_layers): + flags.append(11) + + config.flags = flags + config_str = buildConfigStr(config, layer_desc) + + f.write(config_str) + f.write("-----\n") + + + + f.write("+++++\n") + f.write("conf" + str(2) + " " + str(1.5) + " 0 " + str(Bench.promise_accuracy) + " " + str(0) + "\n") + + config = Config() + flags = [] + for i in range(Bench.num_layers): + flags.append(10) + + config.flags = flags + config_str = buildConfigStr(config, layer_desc) + + f.write(config_str) + f.write("-----\n") + + + + + + if __name__ == "__main__": - """ Bench = bench_tuner_data["alexnet_cifar10"] - generateConf(Bench) - + #generateConf(Bench) + dumpBaselineConfs(Bench) + Bench = bench_tuner_data["alexnet2_cifar10"] - generateConf(Bench) - + #generateConf(Bench) + dumpBaselineConfs(Bench) + Bench = bench_tuner_data["vgg16_cifar10"] - generateConf(Bench) - + #generateConf(Bench) + dumpBaselineConfs(Bench) + Bench = bench_tuner_data["vgg16_cifar100"] - generateConf(Bench) - + #generateConf(Bench) + dumpBaselineConfs(Bench) + Bench = bench_tuner_data["resnet18_cifar10"] - generateConf(Bench) - + #generateConf(Bench) + dumpBaselineConfs(Bench) + Bench = bench_tuner_data["lenet_keras"] - generateConf(Bench) - - """ + #generateConf(Bench) + dumpBaselineConfs(Bench) + Bench = bench_tuner_data["mobilenet_cifar10"] - generateConf(Bench) + #generateConf(Bench) + dumpBaselineConfs(Bench) - #Bench = bench_tuner_data["mobilenet_shallow"] + Bench = bench_tuner_data["mobilenet_shallow"] #generateConf(Bench) + dumpBaselineConfs(Bench) + diff --git a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py index 8f2ded4680af9351fa4e3b571d16eb3725316af1..73d460be0c4091067c9d52e07ea7f4d421765ff3 100644 --- a/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py +++ b/llvm/projects/hpvm-tensor-rt/bin/tuner_src/run_autotuner.py @@ -264,6 +264,9 @@ def runSensAnalysis(): def runAlgoTuner(): + Bench = bench_tuner_data["alexnet_cifar10"] + runAlgoBench(Bench) + Bench = bench_tuner_data["mobilenet_shallow"] runAlgoBench(Bench) @@ -273,14 +276,12 @@ def runAlgoTuner(): Bench = bench_tuner_data["vgg16_cifar10"] runAlgoBench(Bench) - Bench = bench_tuner_data["lenet_keras"] - runAlgoBench(Bench) + #Bench = bench_tuner_data["lenet_keras"] + #runAlgoBench(Bench) Bench = bench_tuner_data["alexnet2_cifar10"] runAlgoBench(Bench) - Bench = bench_tuner_data["alexnet_cifar10"] - runAlgoBench(Bench) Bench = bench_tuner_data["vgg16_cifar100"] runAlgoBench(Bench) diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_promise.cc index 6074dacf3f56e672ac5ca80eda572a53a58f1044..66e824f6d098434e140d764edda7cdacd11e110f 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_promise.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet2_promise.cc @@ -30,7 +30,7 @@ int main(int argc, char* argv[]){ } - llvm_hpvm_initTensorRt(1); + llvm_hpvm_initTensorRt(0); int missed = 0; for (int i = 0 ; i < total_runs; i++){ @@ -41,7 +41,7 @@ int main(int argc, char* argv[]){ startMemTracking(); - int test_input_size = 1000; + int test_input_size = 2000; int batch_size = 1000; int offset = 5000; diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_promise.cc index 0513723b5a4a36984e736b94ee82b9fc3fb2d1f9..6b951cffcaf142bd917abc7f7c04a2c691c472d7 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_promise.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/alexnet_promise.cc @@ -31,9 +31,9 @@ int main(int argc, char* argv[]){ } - llvm_hpvm_initTensorRt(1); - + llvm_hpvm_initTensorRt(0); + int missed = 0; for (int i = 0 ; i < total_runs; i++){ @@ -43,15 +43,15 @@ int main(int argc, char* argv[]){ startMemTracking(); - int test_input_size = 1000; + int test_input_size = 2000; int batch_size = 1000; int offset = 5000; int batch_count = test_input_size / batch_size; float final_accuracy = 0.0; - for(int i = 0; i < batch_count; i++){ - - std::string dir_prefix = std::string("../model_params/alexnet_cifar10_test/"); + for(int i = 0; i < batch_count; i++){ + + std::string dir_prefix = std::string("../model_params/alexnet_cifar10_test/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -79,6 +79,7 @@ int main(int argc, char* argv[]){ std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); + int start = i * batch_size + offset; int end = (i + 1) * batch_size + offset; @@ -117,3 +118,4 @@ int main(int argc, char* argv[]){ return 0; } + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_promise.cc index 1cf73cd92a39a14c6a1fdd3965e63bfabee634b1..052809f29b9d89534005e56125e66c5e4a0bd1cf 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_promise.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_promise.cc @@ -43,8 +43,10 @@ int main(int argc, char* argv[]){ startMemTracking(); - int test_input_size = 1000; - int batch_size = 1000; + int test_input_size = 2000; + int batch_size = 1000; + int offset = 5000; + int batch_count = test_input_size / batch_size; float final_accuracy = 0.0; @@ -330,93 +332,93 @@ int main(int argc, char* argv[]){ void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - int start = i * batch_size; - int end = (i + 1) * batch_size; + int start = i * batch_size + offset; + int end = (i + 1) * batch_size + offset; void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); void* var_0 = ConvLayer_PROMISE(input, -1.9892114, 2.126797, conv2d_1_w, -2.196306920051575, 1.347581704139706, NULL, 0, 0, 1, 1, 1, 1, -1, 0, -1, -60.89275047302246, 51.99256916046146, 9); - void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); + void* var_1 = tensorHalfBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); void* var_2 = tensorRelu(var_1); void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); + void* var_4 = tensorHalfBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); void* var_5 = tensorRelu(var_4); void* var_6 = ConvLayer_PROMISE(var_5, 0.0, 5.713541553974245, conv2d_2_w, -0.9317721160650253, 1.0774258937835774, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.518589503288269, 6.810842518806449, 9); - void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); + void* var_7 = tensorHalfBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); void* var_8 = tensorRelu(var_7); void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); + void* var_10 = tensorHalfBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); void* var_11 = tensorRelu(var_10); void* var_12 = ConvLayer_PROMISE(var_11, 0.0, 4.932139402866376, conv2d_3_w, -0.5316544661521911, 0.5753790403604531, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.482631235122681, 3.96730119752885, 9); - void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); + void* var_13 = tensorHalfBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); void* var_14 = tensorRelu(var_13); void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); + void* var_16 = tensorHalfBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); void* var_17 = tensorRelu(var_16); void* var_18 = ConvLayer_PROMISE(var_17, 0.0, 4.103263397693674, conv2d_4_w, -0.36234098821878435, 0.4076913900375366, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.04261828327179, 3.88677932929993, 9); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); + void* var_19 = tensorHalfBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); void* var_20 = tensorRelu(var_19); void* var_21 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); + void* var_22 = tensorHalfBatchNorm(var_21, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); void* var_23 = tensorRelu(var_22); void* var_24 = ConvLayer_PROMISE(var_23, 0.0, 5.383221302509475, conv2d_5_w, -0.3131200549006462, 0.29357679939270065, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -5.921469215393066, 4.338679324150087, 9); - void* var_25 = tensorBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); + void* var_25 = tensorHalfBatchNorm(var_24, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); void* var_26 = tensorRelu(var_25); void* var_27 = tensorConvolution(var_26, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_28 = tensorBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); + void* var_28 = tensorHalfBatchNorm(var_27, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); void* var_29 = tensorRelu(var_28); void* var_30 = ConvLayer_PROMISE(var_29, 0.0, 4.316738154411368, conv2d_6_w, -0.23299247801303866, 0.2580290257930756, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.207789947509766, 3.932436970710759, 9); - void* var_31 = tensorBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); + void* var_31 = tensorHalfBatchNorm(var_30, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); void* var_32 = tensorRelu(var_31); void* var_33 = tensorConvolution(var_32, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); + void* var_34 = tensorHalfBatchNorm(var_33, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); void* var_35 = tensorRelu(var_34); void* var_36 = ConvLayer_PROMISE(var_35, 0.0, 5.830408106803901, conv2d_7_w, -0.20233777219057084, 0.18998308175802117, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -6.298286915779113, 4.848135117530843, 9); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); + void* var_37 = tensorHalfBatchNorm(var_36, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); void* var_38 = tensorRelu(var_37); void* var_39 = tensorConvolution(var_38, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_40 = tensorBatchNorm(var_39, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); + void* var_40 = tensorHalfBatchNorm(var_39, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); void* var_41 = tensorRelu(var_40); void* var_42 = ConvLayer_PROMISE(var_41, 0.0, 4.446417809963227, conv2d_8_w, -0.17442735651135444, 0.17695830866694454, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.347910885810852, 3.6144364695549145, 9); - void* var_43 = tensorBatchNorm(var_42, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); + void* var_43 = tensorHalfBatchNorm(var_42, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); void* var_44 = tensorRelu(var_43); void* var_45 = tensorConvolution(var_44, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_46 = tensorBatchNorm(var_45, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); + void* var_46 = tensorHalfBatchNorm(var_45, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); void* var_47 = tensorRelu(var_46); void* var_48 = ConvLayer_PROMISE(var_47, 0.0, 4.518095604896667, conv2d_9_w, -0.14546796187758446, 0.15256431668996823, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.0287702755928043, 2.9487365779876953, 9); - void* var_49 = tensorBatchNorm(var_48, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); + void* var_49 = tensorHalfBatchNorm(var_48, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); void* var_50 = tensorRelu(var_49); void* var_51 = tensorConvolution(var_50, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_52 = tensorBatchNorm(var_51, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); + void* var_52 = tensorHalfBatchNorm(var_51, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); void* var_53 = tensorRelu(var_52); void* var_54 = ConvLayer_PROMISE(var_53, 0.0, 6.348575634956407, conv2d_10_w, -0.13025874522328376, 0.13558243343234128, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.2293100805282595, 3.5315046372413645, 9); - void* var_55 = tensorBatchNorm(var_54, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); + void* var_55 = tensorHalfBatchNorm(var_54, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); void* var_56 = tensorRelu(var_55); void* var_57 = tensorConvolution(var_56, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_58 = tensorBatchNorm(var_57, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); + void* var_58 = tensorHalfBatchNorm(var_57, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); void* var_59 = tensorRelu(var_58); void* var_60 = ConvLayer_PROMISE(var_59, 0.0, 5.221003110408843, conv2d_11_w, -0.11900172759592534, 0.12536374783515936, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.038203780174255, 4.004009407043483, 9); - void* var_61 = tensorBatchNorm(var_60, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); + void* var_61 = tensorHalfBatchNorm(var_60, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); void* var_62 = tensorRelu(var_61); void* var_63 = tensorConvolution(var_62, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_64 = tensorBatchNorm(var_63, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); + void* var_64 = tensorHalfBatchNorm(var_63, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); void* var_65 = tensorRelu(var_64); void* var_66 = ConvLayer_PROMISE(var_65, 0.0, 5.732498347759442, conv2d_12_w, -0.10839721685647964, 0.11625668607652187, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -3.3111015114784244, 4.462933233261136, 9); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); + void* var_67 = tensorHalfBatchNorm(var_66, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); void* var_68 = tensorRelu(var_67); void* var_69 = tensorConvolution(var_68, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_70 = tensorBatchNorm(var_69, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_71 = tensorRelu(var_70); + void* var_70 = tensorHalfBatchNorm(var_69, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); + void* var_71 = tensorHalfRelu(var_70); void* var_72 = ConvLayer_PROMISE(var_71, 0.0, 7.240498211860681, conv2d_13_w, -0.08623744961619377, 0.08859449951350662, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -4.175431394577027, 6.2043294754027345, 9); - void* var_73 = tensorBatchNorm(var_72, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_74 = tensorRelu(var_73); + void* var_73 = tensorHalfBatchNorm(var_72, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); + void* var_74 = tensorHalfRelu(var_73); void* var_75 = tensorConvolution(var_74, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_76 = tensorBatchNorm(var_75, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); + void* var_76 = tensorHalfBatchNorm(var_75, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); void* var_77 = tensorRelu(var_76); void* var_78 = ConvLayer_PROMISE(var_77, 0.0, 7.813958834648251, conv2d_14_w, -0.06813025139272214, 0.07002027779817581, NULL, 0, 0, 0, 0, 1, 1, -1, 0, -1, -10.920566423416137, 2.6442912578582534, 9); - void* var_79 = tensorBatchNorm(var_78, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_80 = tensorRelu(var_79); - void* var_81 = tensorPooling(var_80,1,2,2,0,0,2,2); + void* var_79 = tensorHalfBatchNorm(var_78, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); + void* var_80 = tensorHalfRelu(var_79); + void* var_81 = tensorHalfPooling(var_80,1,2,2,0,0,2,2); void* var_82 = FCLayer_PROMISE(var_81, 0.0, 2.8692066650391013, dense_1_w, -0.22301019695401192, 0.1442659378200768, dense_1_b, -0.1654396, 0.23336112, -1, -12.245949958801269, 23.80532513427739, 9); void* var_83 = tensorSoftmax(var_82); diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_promise.cc index 394ec85390aa4248fd93aefa339ff196f39a5559..42d26d34e65939b410143485a61f23e705906bfc 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_promise.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/mobilenet_shallow_promise.cc @@ -42,8 +42,10 @@ int main(int argc, char* argv[]){ startMemTracking(); - int test_input_size = 1000; - int batch_size = 1000; + int test_input_size = 2000; + int batch_size = 1000; + int offset = 5000; + int batch_count = test_input_size / batch_size; float final_accuracy = 0.0; @@ -188,8 +190,8 @@ int main(int argc, char* argv[]){ void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - int start = i * batch_size; - int end = (i + 1) * batch_size; + int start = i * batch_size + offset; + int end = (i + 1) * batch_size + offset; void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_promise.cc index cc0981dc7d1d75ce56388f3135fa0f89f8c688e3..0e5cdd1d284e6c7621cd3331b924c06969be79db 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_promise.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/resnet18_promise.cc @@ -30,7 +30,7 @@ int main(int argc, char* argv[]){ } - llvm_hpvm_initTensorRt(1); + llvm_hpvm_initTensorRt(0); int missed = 0; for (int i = 0 ; i < total_runs; i++){ @@ -41,9 +41,10 @@ int main(int argc, char* argv[]){ startMemTracking(); - int test_input_size = 1000; + int test_input_size = 2000; int batch_size = 1000; int offset = 5000; + int batch_count = test_input_size / batch_size; float final_accuracy = 0.0; diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_promise.cc index ec5de9a5e2c2d66be44fdd99b83dd634d8f5b2f9..33c68eae84a075f50b2bc8e7484036c54ade5620 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_promise.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar100_promise.cc @@ -31,7 +31,7 @@ int main(int argc, char* argv[]){ } - llvm_hpvm_initTensorRt(1); + llvm_hpvm_initTensorRt(0); int missed = 0; @@ -43,8 +43,10 @@ int main(int argc, char* argv[]){ startMemTracking(); - int test_input_size = 1000; - int batch_size = 1000; + int test_input_size = 2000; + int batch_size = 1000; + int offset = 5000; + int batch_count = test_input_size / batch_size; float final_accuracy = 0.0; @@ -115,8 +117,9 @@ int main(int argc, char* argv[]){ void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,100,1,1); - int start = i * batch_size; - int end = (i + 1) * batch_size; + int start = i * batch_size + offset; + int end = (i + 1) * batch_size + offset; + void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_promise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_promise.cc index 798b5f67aa9636f8e7ad3b9d08b9fc8e53cb137d..ff767235e9d44139f97ad885aa89eef1c385ad33 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_promise.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/promise/vgg16_cifar10_promise.cc @@ -29,7 +29,7 @@ int main(int argc, char* argv[]){ to_skip = atoi(argv[3]); } - llvm_hpvm_initTensorRt(1); + llvm_hpvm_initTensorRt(0); int missed = 0; for (int i = 0 ; i < total_runs; i++){ @@ -40,7 +40,7 @@ int main(int argc, char* argv[]){ startMemTracking(); - int test_input_size = 1000; + int test_input_size = 2000; int batch_size = 1000; int offset = 5000; diff --git a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/algo_tuner.py b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/algo_tuner.py index 0d2f1ff481258b7d2605e98468cf6ebd66bffd64..b8145e179893bc0db2631cf1f7ee0f11bcc9be0e 100644 --- a/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/algo_tuner.py +++ b/llvm/projects/hpvm-tensor-rt/opentuner/autotuner/algo_tuner.py @@ -132,6 +132,7 @@ def readKnobConfig(file_path): def getConfigCost(cfg): + orig_cost = 0.0 total_cost = 0.0 for it in range(tunerData.num_layers): flag = tunerData.tuning_flags[it] @@ -140,9 +141,27 @@ def getConfigCost(cfg): speedup = tunerData.knobs_speedup[flag_value] total_cost += (op_cost * 1.0 / speedup * 1.0) - it += 1 + orig_cost += op_cost - return total_cost + it += 1 + + speedup = (orig_cost * 1.0) / (total_cost * 1.0) + + return total_cost, speedup + + + +def appendTopLine(f_path, accuracy, total_runs, total_comps, speedup): + + f_str = open(f_path, "r").read() + + f_out = open(f_path, "w+") + + f_out.write("total_runs=" + str(total_runs) + "\tconfidence=100.0" + "\tavg_accuracy=" + str(accuracy) + "\tconfig_cost=" + str(total_comps) + "\tspeedup=" + str(speedup) + "\n" ) + f_out.write(f_str) + + f_out.close() + @@ -213,7 +232,7 @@ class ClangFlagsTuner(MeasurementInterface): accuracy = getAccuracy("final_accuracy") # getConfigCost returns the cost associated with the selected configuration - total_comps = getConfigCost(cfg) + total_comps, speedup = getConfigCost(cfg) Result = opentuner.resultsdb.models.Result() @@ -226,7 +245,10 @@ class ClangFlagsTuner(MeasurementInterface): if min_accuracy > tunerData.accuracy_threshold: config_tuple = (total_comps, accuracy, cfg) self.configs_list.append(config_tuple) - shutil.copy('promise_flags', tunerData.output_dir + '/' + tunerData.binary_path + '_' + str(tunerData.test_id)) + f_path = tunerData.output_dir + '/' + tunerData.binary_path + '_' + str(tunerData.test_id) + shutil.copy('promise_flags', f_path) + + appendTopLine(f_path, accuracy, total_runs, total_comps, speedup) f_acc = open(tunerData.output_dir + '/' + tunerData.binary_path + '_' + str(tunerData.test_id) + "_accuracy", "w") f_acc.write(str(accuracy))