diff --git a/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/layers.txt b/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/layers.txt new file mode 100644 index 0000000000000000000000000000000000000000..01f40077d4f8342479d1965551af2d7e30a4c3f2 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/layers.txt @@ -0,0 +1,13 @@ +conv add tanh +conv add tanh pool +conv add tanh +conv add tanh pool +conv add tanh +conv add tanh pool +dense add +reduce +conv +conv +conv +reduce +reduce diff --git a/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/op_cost.txt b/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/op_cost.txt new file mode 100644 index 0000000000000000000000000000000000000000..80ff2706a43e33b81af6d47e96f702efdfcb21b3 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet2_canny_hpvm/op_cost.txt @@ -0,0 +1,13 @@ +468.076 +947.434 +255.422 +348.769 +256.658 +1.05427 +1.05427 +107.5062 +666.888 +432.622 +252.458 +11.51922 +2.01168 diff --git a/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/layer_composition.txt b/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/layer_composition.txt new file mode 100644 index 0000000000000000000000000000000000000000..b2bf962cd60722978b3205adca9c5822e59fc603 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/layer_composition.txt @@ -0,0 +1,8 @@ +conv add activation pool +conv add activation pool +conv add activation +conv add activation +conv add activation pool +dense add activation +dense add activation +dense add diff --git a/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/op_cost.txt b/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/op_cost.txt new file mode 100644 index 0000000000000000000000000000000000000000..ec3b8b5f375673e659594dca7ad8fd8ef6ace435 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/autotuner/data/alexnet_imagenet/op_cost.txt @@ -0,0 +1,8 @@ +1457111.000000 +4478976.000000 +2242805.750000 +2990407.750000 +1993605.125000 +754974.750000 +335544.312500 +81920.000000 diff --git a/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt b/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt index 0a5aeb2cb5e58bc3a7e6c37205d841c17889dfb9..ee2cd80cb6e33da5e97ffe2e842644d7a705cdff 100644 --- a/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt +++ b/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt @@ -18,6 +18,24 @@ perf,135 4,1,0 1.33 tensorConvolution tensorConvApprox dev conv perf,136 4,1,1 1.33 tensorConvolution tensorConvApprox dev conv perf,137 4,1,2 1.33 tensorConvolution tensorConvApprox dev conv perf,138 4,1,3 1.33 tensorConvolution tensorConvApprox dev conv +perf_fp16,151 1,2,0 3.0 tensorConvolution tensorConvApprox install conv +perf_fp16,152 1,2,1 3.0 tensorConvolution tensorConvApprox install conv +perf_fp16,153 2,1,0 3.0 tensorConvolution tensorConvApprox install conv +perf_fp16,154 2,1,1 3.0 tensorConvolution tensorConvApprox install conv +perf_fp16,155 1,3,0 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,156 1,3,1 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,157 1,3,2 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,158 3,1,0 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,159 3,1,1 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,160 3,1,2 2.25 tensorConvolution tensorConvApprox install conv +perf_fp16,161 1,4,0 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,162 1,4,1 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,163 1,4,2 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,164 1,4,3 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,165 4,1,0 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,166 4,1,1 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,167 4,1,2 2.0 tensorConvolution tensorConvApprox install conv +perf_fp16,168 4,1,3 2.0 tensorConvolution tensorConvApprox install conv samp,231 2,0,1 2.0 tensorConvolution tensorConvApprox dev conv samp,232 2,1,1 2.0 tensorConvolution tensorConvApprox dev conv samp,233 3,0,1 1.5 tensorConvolution tensorConvApprox dev conv @@ -27,6 +45,15 @@ samp,236 4,0,1 1.33 tensorConvolution tensorConvApprox dev conv samp,237 4,1,1 1.33 tensorConvolution tensorConvApprox dev conv samp,238 4,2,1 1.33 tensorConvolution tensorConvApprox dev conv samp,239 4,3,1 1.33 tensorConvolution tensorConvApprox dev conv +samp_fp16,261 2,0,1 3.0 tensorConvolution tensorConvApprox install conv +samp_fp16,262 2,1,1 3.0 tensorConvolution tensorConvApprox install conv +samp_fp16,263 3,0,1 2.25 tensorConvolution tensorConvApprox install conv +samp_fp16,264 3,1,1 2.25 tensorConvolution tensorConvApprox install conv +samp_fp16,265 3,2,1 2.25 tensorConvolution tensorConvApprox install conv +samp_fp16,266 4,0,1 2.0 tensorConvolution tensorConvApprox install conv +samp_fp16,267 4,1,1 2.0 tensorConvolution tensorConvApprox install conv +samp_fp16,268 4,2,1 2.0 tensorConvolution tensorConvApprox install conv +samp_fp16,269 4,3,1 2.0 tensorConvolution tensorConvApprox install conv red_samp,41 1 1.5 tensorReduction tensorReduction dev red red_samp,42 1 2.25 tensorReduction tensorReduction dev red red_samp,43 1 1.4 tensorReduction tensorReduction dev red diff --git a/llvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/layers.txt b/llvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/layers.txt new file mode 100644 index 0000000000000000000000000000000000000000..a93fac1daed00254fca84258bc92e7788390fd93 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/layers.txt @@ -0,0 +1,81 @@ +conv +batchnorm +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +depthwise_conv +batchnorm +activation +conv +batchnorm +activation +dense add diff --git a/llvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/op_cost.txt b/llvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/op_cost.txt new file mode 100644 index 0000000000000000000000000000000000000000..44d50dbe00baba66bd76bb7a0d2a9f37b8580fd4 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/autotuner/data/mobilenet_torch/op_cost.txt @@ -0,0 +1,15 @@ +44236.80078 +104857.6019 +104857.6019 +209715.2037 +104857.6019 +209715.2037 +104857.6019 +209715.2037 +209715.2037 +209715.2037 +209715.2037 +209715.2037 +104857.6019 +209715.2037 +256.000000 diff --git a/llvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/op_cost.txt b/llvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/op_cost.txt new file mode 100644 index 0000000000000000000000000000000000000000..6fb1aef66aaa4a02c5eb6f9282753a43c629f203 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/op_cost.txt @@ -0,0 +1,21 @@ +88473.60156 +1887436.833 +1887436.833 +1887436.833 +1887436.833 +3774873.667 +1887436.833 +26214.40046 +1887436.833 +1887436.833 +3774873.667 +1887436.833 +13107.20023 +1887436.833 +1887436.833 +3774873.667 +1887436.833 +6553.600116 +1887436.833 +1887436.833 +64.0000000 diff --git a/llvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/resnet_layers.txt b/llvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/resnet_layers.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e51c67842656762091f2465b2824235a9959723 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/autotuner/data/resnet18_torch/resnet_layers.txt @@ -0,0 +1,59 @@ +conv +batchnorm +activation +conv +batchnorm +activation +conv +batchnorm +activation +conv +batchnorm +activation +conv +batchnorm +activation +conv +batchnorm +activation +conv +batchnorm +conv +batchnorm +activation +conv +batchnorm +activation +conv +batchnorm +activation +conv +batchnorm +activation +conv +batchnorm +conv +batchnorm +activation +conv +batchnorm +activation +conv +batchnorm +activation +conv +batchnorm +activation +conv +batchnorm +conv +batchnorm +conv +batchnorm +activation +conv +batchnorm +activation +activation +pool_mean +dense add \ No newline at end of file diff --git a/llvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/layers.txt b/llvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/layers.txt new file mode 100644 index 0000000000000000000000000000000000000000..ef3d0ebcf7c50b8a67a7c42cc71d4b69fe21fde2 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/layers.txt @@ -0,0 +1,46 @@ +conv add +batchnorm +activation +conv add +batchnorm +activation +pool +conv add +batchnorm +activation +conv add +batchnorm +activation +pool +conv add +batchnorm +activation +conv add +batchnorm +activation +conv add +batchnorm +activation +pool +conv add +batchnorm +activation +conv add +batchnorm +activation +conv add +batchnorm +activation +pool +conv add +batchnorm +activation +conv add +batchnorm +activation +conv add +batchnorm +activation +pool +pool_mean +dense add diff --git a/llvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/op_cost.txt b/llvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/op_cost.txt new file mode 100644 index 0000000000000000000000000000000000000000..10dc83f865f3cc4ec02e86d4ae9f689eaa143610 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/autotuner/data/vgg16_cifar10_torch/op_cost.txt @@ -0,0 +1,15 @@ +88473.60156 +1887436.833 +943718.4167 +1887436.833 +943718.4167 +1887436.833 +1887436.833 +943718.4167 +1887436.833 +1887436.833 +471859.2083 +471859.2083 +471859.2083 +13107.200195 +256.000000 diff --git a/llvm/projects/hpvm-tensor-rt/model_params/alexnet2_cifar10/quant_ranges.txt b/llvm/projects/hpvm-tensor-rt/model_params/alexnet2_cifar10/quant_ranges.txt new file mode 100644 index 0000000000000000000000000000000000000000..46165302552e71fd678600da85140d33101d298e --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/alexnet2_cifar10/quant_ranges.txt @@ -0,0 +1,7 @@ +-1.8816435 2.0934134 -0.5421946 0.3710851 -0.06697306 0.040868897 -0.775027394891 0.779944300652 +-0.775027394891 0.779944300652 -0.42474225 0.31460348 -0.3557253 -0.17281663 -0.808667064309 0.983953297734 +-0.808667064309 0.983953297734 -0.44134507 0.79587924 -0.80424446 0.75330096 -0.995678424835 0.998566448689 +-0.995678424835 0.998566448689 -0.2883836 0.31025785 -0.6353164 0.29015934 -0.993219196796 0.992379009724 +-0.993219196796 0.992379009724 -0.2792431 0.37689754 -1.1379756 1.2391574 -0.999901354313 0.999910891056 +-0.999901354313 0.999910891056 -0.27078503 0.27942517 -0.503003 0.12762362 -0.991036117375 0.971404970288 +-0.991036117375 0.971404970288 -0.24273404 0.5845544 -0.53745 0.558251 -119.27973732 -25.2262819576 diff --git a/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/quant_ranges.txt b/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/quant_ranges.txt new file mode 100644 index 0000000000000000000000000000000000000000..789a4114a5a468b3634506c4016b16b8b80c9131 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/quant_ranges.txt @@ -0,0 +1,6 @@ +-1.88164262419 2.09340954985 -0.33087718 0.3323643 -0.7782218 0.6020472 -0.978641152382 0.998945295811 +-0.978641152382 0.998945295811 -0.2095158 0.33543423 -0.45020863 0.30596754 -0.999703943729 0.999930202961 +-0.999703943729 0.999930202961 -0.1715614 0.17037082 -0.6519161 0.5939945 -0.999933600426 0.999940037727 +-0.999933600426 0.999940037727 -0.15575546 0.14456555 -0.55873865 0.4704539 -0.99999910593 0.999999344349 +-0.99999910593 0.999999344349 -0.16108225 0.16864482 -0.22135437 0.10401678 -0.999434411526 0.999634206295 +-0.999434411526 0.999634206295 -0.18183032 0.19018902 -0.07189204 0.106005594 -15.0765653801 19.4225852203 diff --git a/llvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/quant_ranges.txt b/llvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/quant_ranges.txt new file mode 100644 index 0000000000000000000000000000000000000000..af4d13d6f8e6b5902ff743b07ef6875d644df91a --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/lenet_mnist/quant_ranges.txt @@ -0,0 +1,4 @@ +0 1 -1 1 -1 1 -1 1 +-1 1 -1 1 -1 1 -1 1 +-1 1 -1 1 -1 1 -1 1 +-1 1 -1 1 -1 1 -1 1 diff --git a/llvm/projects/hpvm-tensor-rt/model_params/mobilenet/quant_ranges.txt b/llvm/projects/hpvm-tensor-rt/model_params/mobilenet/quant_ranges.txt new file mode 100644 index 0000000000000000000000000000000000000000..9ea66b8485dc19a8f2f9abfc5981e023f22ce521 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/mobilenet/quant_ranges.txt @@ -0,0 +1,15 @@ +-1.9892114 2.126797 -2.19630692005 1.34758170414 0.0 0.0 -60.892750473 51.9925691605 +0.0 5.71354155397 -0.931772116065 1.07742589378 0.0 0.0 -6.51858950329 6.81084251881 +0.0 4.93213940287 -0.531654466152 0.57537904036 0.0 0.0 -4.48263123512 3.96730119753 +0.0 4.10326339769 -0.362340988219 0.407691390038 0.0 0.0 -4.04261828327 3.8867793293 +0.0 5.38322130251 -0.313120054901 0.293576799393 0.0 0.0 -5.92146921539 4.33867932415 +0.0 4.31673815441 -0.232992478013 0.258029025793 0.0 0.0 -4.20778994751 3.93243697071 +0.0 5.8304081068 -0.202337772191 0.189983081758 0.0 0.0 -6.29828691578 4.84813511753 +0.0 4.44641780996 -0.174427356511 0.176958308667 0.0 0.0 -4.34791088581 3.61443646955 +0.0 4.5180956049 -0.145467961878 0.15256431669 0.0 0.0 -3.02877027559 2.94873657799 +0.0 6.34857563496 -0.130258745223 0.135582433432 0.0 0.0 -4.22931008053 3.53150463724 +0.0 5.22100311041 -0.119001727596 0.125363747835 0.0 0.0 -4.03820378017 4.00400940704 +0.0 5.73249834776 -0.108397216856 0.116256686077 0.0 0.0 -3.31110151148 4.46293323326 +0.0 7.24049821186 -0.0862374496162 0.0885944995135 0.0 0.0 -4.17543139458 6.2043294754 +0.0 7.81395883465 -0.0681302513927 0.0700202777982 0.0 0.0 -10.9205664234 2.64429125786 +0.0 2.86920666504 -0.223010196954 0.14426593782 -0.1654396 0.23336112 -12.2459499588 23.8053251343 diff --git a/llvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/quant_ranges.txt b/llvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/quant_ranges.txt new file mode 100644 index 0000000000000000000000000000000000000000..af0279b1d2980d8c8d71f20f3ef8c3f3da585699 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/resnet18_cifar10/quant_ranges.txt @@ -0,0 +1,22 @@ +-0.5500815 0.60786617 -1.0248864 1.2929907 -0.36291853 0.2533059 0.0 0.753551840782 +0.0 0.753551840782 -0.69884616 0.71849966 -0.2781147 0.45571187 0.0 1.01057458043 +0.0 1.01057458043 -0.59568167 0.7714691 -0.8602873 0.19743633 -1.84771883726 1.87930787086 +0.0 2.33981014252 -0.41976976 0.43748936 -0.7021962 0.3033103 0.0 1.04317724705 +0.0 1.04317724705 -0.46757826 0.4635873 -0.20662616 0.1778044 -0.829483509064 0.786805033684 +0.0 2.49733686686 -0.64404047 0.45383143 -0.819547 0.38550296 0.0 0.897360802293 +0.0 0.897360802293 -0.41986948 0.33654243 -0.3563013 0.22371122 -0.957150224447 0.54919362247 +0.0 2.37362146616 -0.4805263 0.50655717 -0.296758 0.7742441 0.0 3.01592136621 +0.0 3.01592136621 -0.52083415 0.45517674 -0.20242067 0.8236838 -5.2759475708 5.79733039856 +0.0 2.37362146616 -0.5338656 1.3395424 -0.20242067 0.8236838 -0.738995380998 2.33600783587 +0.0 7.07933432579 -0.34429058 0.43629733 -1.0744808 0.056708273 0.0 1.58645607233 +0.0 1.58645607233 -0.30342352 0.39493486 -0.44630566 0.6492069 -1.49672914267 1.29970229745 +0.0 7.11914063454 -0.38351893 0.45775774 -1.4733055 -0.014426912 0.0 1.52876508832 +0.0 1.52876508832 -0.25695276 0.45372736 -0.5259744 0.26591402 -1.59576894164 1.08074297309 +0.0 6.94405080318 -0.55299705 0.5443531 -0.71790683 1.2730768 0.0 10.3651468277 +0.0 10.3651468277 -0.4203967 0.48641303 -0.90653443 1.3546854 -22.372925148 17.2033731079 +0.0 6.94405080318 -0.4365755 0.84913826 -0.90653443 1.3546851 -3.66810325861 4.87814051151 +0.0 18.8401451111 -0.38657624 0.5228989 -1.2083547 0.76361173 0.0 19.1229192352 +0.0 19.1229192352 -0.40857902 0.575035 -1.8731614 1.0960501 -31.3229312897 14.8234729958 +0.0 23.7382488823 -0.33079496 0.5893278 -1.0234511 1.0016295 0.0 19.5892774963 +0.0 19.5892774963 -0.27897888 0.38280907 -2.2086356 1.0066502 -34.4416886902 20.9890329933 +0.0 10.8541981602 -1.5092047 1.0279838 -0.49379802 0.61032647 -40.9121678543 25.7082381058 diff --git a/llvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/quant_ranges.txt b/llvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/quant_ranges.txt new file mode 100644 index 0000000000000000000000000000000000000000..b742502f145c535db5432c0f6a0de27ba3ed3979 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/quant_ranges.txt @@ -0,0 +1,15 @@ +-1.8816367 2.0934217 -0.53275156 0.49437004 -0.6403629 0.2490165 0.0 1.35908746719 +0.0 1.35908746719 -0.2688396 0.20639156 -0.7745511 0.82006615 0.0 2.52123117924 +0.0 2.52123117924 -0.16776876 0.14878987 -0.35283303 0.5154362 0.0 1.20119857848 +0.0 1.20119857848 -0.088948585 0.114222586 -0.30250227 0.36856708 0.0 1.03598809302 +0.0 1.03598809302 -0.07739562 0.10973293 -0.15568458 0.17634983 0.0 0.300495595038 +0.0 0.300495595038 -0.051649556 0.05435231 -0.07395447 0.07996062 0.0 0.11490475405 +0.0 0.11490475405 -0.043513633 0.07577866 -0.06921874 0.02660573 0.0 0.16232508488 +0.0 0.16232508488 -0.033842053 0.045218028 -0.022827804 0.023845317 0.0 0.124249965735 +0.0 0.124249965735 -0.02211613 0.032084666 -0.02699063 0.03773564 0.0 0.174634486511 +0.0 0.174634486511 -0.01979376 0.034854397 -0.036107242 0.07056531 0.0 0.575175762177 +0.0 0.575175762177 -0.03452098 0.046055835 -0.051925894 0.07039055 0.0 0.771875114441 +0.0 0.771875114441 -0.025946895 0.040090334 -0.06049362 0.12658806 0.0 1.17285169065 +0.0 1.17285169065 -0.021766115 0.03315237 -0.20705001 0.117947325 0.0 2.00157693863 +0.0 2.00157693863 -0.042597745 0.046707444 -0.21937433 0.2545502 0.0 2.00236111879 +0.0 2.00236111879 -0.32550547 0.30829763 -1.1787822 1.2378151 -18.2514705467 24.1736344528 diff --git a/llvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/quant_ranges.txt b/llvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/quant_ranges.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e614e1664822d2ecf6fa426a7eb2fd7c362a2e7 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/quant_ranges.txt @@ -0,0 +1,15 @@ +-1.7829767 1.9456929 -0.7450515 0.71249133 -1.5885142 0.275554 0.0 8.190712 +0.0 8.190712 -0.30790088 0.43504623 -1.4242363 1.2602744 0.0 19.023172 +0.0 19.023172 -0.29189092 0.26958522 -1.0527138 0.9075671 0.0 14.428051 +0.0 14.428051 -0.15521508 0.1829038 -0.845419 1.9358484 0.0 23.065294 +0.0 23.065294 -0.13149762 0.14811686 -0.7162557 1.0370971 0.0 15.165984 +0.0 15.165984 -0.06236292 0.08321518 -0.9067523 0.9922458 0.0 13.664733 +0.0 13.664733 -0.06471479 0.1024472 -0.15943134 0.7988499 0.0 19.025272 +0.0 19.025272 -0.06320205 0.08291938 -0.32540628 0.5203079 0.0 6.727217 +0.0 6.727217 -0.037707984 0.051601283 -0.25622904 0.11251946 0.0 3.2003012 +0.0 3.2003012 -0.056007143 0.09549151 -0.11591503 0.06267536 0.0 4.321189 +0.0 4.321189 -0.060094673 0.10868926 -0.105962686 0.09584572 0.0 2.936297 +0.0 2.936297 -0.034618977 0.05792674 -0.4237576 0.11035452 0.0 4.87262 +0.0 4.87262 -0.035480656 0.058295887 -0.21477045 0.14263579 0.0 10.32133 +0.0 10.32133 -0.08929961 0.11301676 -0.20798548 0.47405547 0.0 13.91 +0.0 13.91 -0.6627122 0.35539475 -1.0631907 0.9830786 -70.45701 87.34367 diff --git a/llvm/projects/pred_tuner/.gitignore b/llvm/projects/pred_tuner/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..23e6d258015162d516c02fecb0a4f87acf4fb73d --- /dev/null +++ b/llvm/projects/pred_tuner/.gitignore @@ -0,0 +1,28 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Jupyter Notebook +.ipynb_checkpoints + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Opentuner +opentuner.db/ +opentuner.log + +# Custom +.idea/ +.vscode/ +/data/ +results/ +tuner_results +tuner_results/ +*.sh +*.ipynb +logistics/ +autotuner/ diff --git a/llvm/projects/pred_tuner/LICENSE b/llvm/projects/pred_tuner/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..2e229faa39851c4ddf71b0284c7e56a02dfd577a --- /dev/null +++ b/llvm/projects/pred_tuner/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 liukuang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/llvm/projects/pred_tuner/README.md b/llvm/projects/pred_tuner/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d7a6db2bdc622e6cac73c56e443e8d3e797133c --- /dev/null +++ b/llvm/projects/pred_tuner/README.md @@ -0,0 +1,93 @@ +# Autotuning with Error-predictive Proxy + +Performs autotuning on program approximation knobs using an error-predictive proxy in place of the original +program, to greatly speedup autotuning while getting results comparable in quality. + +Work in progress. + +## Getting Started + +After finishing this readme, go to [./proxy_tuner.py](./proxy_tuner.py) to try tuning one +model. Use this set of arguments for a start: + +```bash +python proxy_tuner.py --test-limit 1000 --accuracy-drop 1.5 --accuracy-slack 2.1 \ +-o tuner_output alexnet2 autotuner/data/alexnet2 +``` + +## Supported Programs & Approximations + +### Programs + +Currently DNN only. Support for several image processing benchmarks are in progress. + +Supported DNNs: + +- `LeNet @ MNIST` + +- `AlexNet @ CIFAR-10` + +- `AlexNet2 @ CIFAR-10` + +- `VGG16 @ CIFAR-10` + +- `ResNet18 @ CIFAR-10` + +- `MobileNet @ CIFAR-10` + +- `VGG16 @ CIFAR-100` + +- `VGG16 @ ImageNet` + +- `ResNet50 @ ImageNet` + +### Approximations + +Currently _hardware-independent_ approximations only. Hardware-reliant approximations are in progress. + +Approximations: (output) perforation for convolution, kernel sampling for convolution. + +## Proxy Model + +TODO: add working principle of proxy modeling. + +## Autotuner + +We use [opentuner](http://opentuner.org/) for autontuning tasks. + +## Project Structure + +### Library + +- `models`: PyTorch definition for DNN models + + - `models/dataset`: Dataset loaders for both HPVM and PyTorch-standard DNN models + + - `models/hpvm`: Definition for HPVM-ported models, with customized convolution layers + +- `toolkit`: core code of project, including DNN indexing / transformations / approximations. See + the code for details. + +### Entry Point + +- `./proxy_tuner.py`: perform autotuning for a given model, accuracy threshold, and a number of iterations, + using a proxy model that predicts the accuracy of approximated DNN (instead of running an inference, which + can be slow). + +- `./run_proxy_tuner.py`: run autotuning for all models defined in `utils/tuner_postprocess/benchmarks.py` on + a set of 3 accuracy thresholds, and perform postprocessing such as computing pareto curve. + + This is the right end-to-end script to use for obtaining a comprehensive set of autotuner results. + +### Other Code + +- `tests`: runnable scripts that can be used as tests (and other actual functionalities) + +- `utils`: helper functions for library and autotuner that are generally standalone, except + + - `utils/utils.py` contains some convenient wrapper for model training, etc. that depends on the library. + +### Data + +- `autotuner/data`: descriptions for each DNN model, such as listing of layers, tunable + knobs, etc. diff --git a/llvm/projects/pred_tuner/bin/benchmark.py b/llvm/projects/pred_tuner/bin/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..92c8b2de5262469d9b752b5a2acd28db55e464a5 --- /dev/null +++ b/llvm/projects/pred_tuner/bin/benchmark.py @@ -0,0 +1,111 @@ +import gc +from time import time +from typing import Dict, Iterator, List + +import numpy +from tqdm import tqdm + +from exp import Benchmark, bench_tuner_data +from toolkit import ConfigT, LinearCombEstimator, LinearEstimator, LinearQoSEstimator, ModuleIndexer, \ + NetApproxSelector +from utils import gpu_mem_mb, init_by_name, nn_to_output, tensor_to_accuracy + + +def generate_random_configs(layer_approxes: Dict[int, List[int]], n_configs: int) -> Iterator[ConfigT]: + from numpy.random import choice + from random import randrange + all_layers = [k for k, ns in layer_approxes.items() if ns] + for _ in range(n_configs): + config = {} + n_approx_layers_ = randrange(len(all_layers) + 1) + approx_layers = choice(all_layers, n_approx_layers_, replace=False) + for layer_idx in approx_layers: + config[layer_idx] = choice(layer_approxes[layer_idx], 1)[0] + yield config + + +def time_action(action): + tt0 = time() + action() + tt1 = time() + return tt1 - tt0 + + +def mean_std_str(np_array): + return f"{np_array.mean():.7f} +- {np_array.std():.7f}" + + +def main_loop(bench, baseline_dag, testloader): + _t_baseline_inf = time() + baseline_output = nn_to_output(baseline_dag.module, testloader) + baseline_acc = tensor_to_accuracy(baseline_output, testloader) + print(f"Model accuracy: {baseline_acc}; test set size: {baseline_output.size(0)}") + t_baseline_inf = time() - _t_baseline_inf + nas = NetApproxSelector(baseline_dag) + + def acc_crit(inputs_): + return tensor_to_accuracy(inputs_, testloader) + + def threshold_eval(inputs_): + import numpy as np + accs = np.array([acc_crit(x) for x in inputs_]) + return baseline_acc - accs.mean() < 3.0 + + def run_model(net): + return nn_to_output(net, testloader) + + _t_profile = time() + pickle_path = bench.result_dir / 'proxy.pkl' + f1 = LinearCombEstimator( + nas, run_model, acc_crit, threshold_eval, 0.95, independent_init=False + ) + f2 = LinearQoSEstimator( + nas, run_model, acc_crit, threshold_eval, 0.95, independent_init=False + ) + LinearEstimator.coinit_estimators(nas, run_model, threshold_eval, f1, f2, storage=pickle_path) + t_profile = time() - _t_profile + print( + f"Baseline inference time: {t_baseline_inf:.3f} sec, predictor init time: {t_profile:.3f} sec; " + f"Predictor init time is {t_profile / t_baseline_inf:.3f} times of inference time" + ) + configs = generate_random_configs(nas.net_approxes, 30) + pbar = tqdm(configs) + times = [] + for config in pbar: + pbar.set_postfix(mem=gpu_mem_mb()) + approx = nas.apply_approx_by_config(config).module + t_inf = time_action(lambda: nn_to_output(approx, testloader)) + t_f1 = time_action(lambda: f1.estimate(config)) + t_f2 = time_action(lambda: f2.estimate(config)) + pbar.write( + f"Inference time: {t_inf:.3f} sec, predictors time: {t_f1:.3f} | {t_f2:.3f} sec" + ) + times.append([t_inf, t_f1, t_f2]) + gc.collect() + times = numpy.array(times) + s_inf, s0, s1 = numpy.apply_along_axis(mean_std_str, 0, times) + print(f"Result: inference time {s_inf}, predictor time: {s0} | {s1}") + print("Timing raw data:", times) + + +def main(): + for network in ( + 'alexnet_hpvm', 'alexnet2_hpvm', + 'vgg16_cifar10_hpvm', 'vgg16_cifar100_hpvm', + 'mobilenet_hpvm', + 'resnet18_hpvm', + 'lenet_hpvm', + 'vgg16_imagenet_hpvm', + 'alexnet_imagenet_hpvm', + # 'resnet50_imagenet_hpvm', + ): + bench: Benchmark = bench_tuner_data[network] + print(f"{network}: ") + baseline, testloader, _, shapes = init_by_name(network) + baseline_dag = ModuleIndexer(baseline) + main_loop(bench, baseline_dag, testloader) + gc.collect() + + +if __name__ == '__main__': + main() diff --git a/llvm/projects/pred_tuner/bin/discrepancy.py b/llvm/projects/pred_tuner/bin/discrepancy.py new file mode 100644 index 0000000000000000000000000000000000000000..8be92df66ae3a2bcb2d33088bb20064404d37913 --- /dev/null +++ b/llvm/projects/pred_tuner/bin/discrepancy.py @@ -0,0 +1,53 @@ +import os +from pathlib import Path +from typing import Optional + +import matplotlib.pyplot as plt +import seaborn +import torch +from tqdm import tqdm + +from toolkit import ModuleIndexer, NetApproxSelector, StateCapturer +from utils import device, init_by_name + + +def run_concat_output_at(net_index: ModuleIndexer, testloader, layer: int) -> Optional[torch.Tensor]: + snet = StateCapturer(net_index, lambda i, x: x.clone().detach() if i == layer else None) + for inputs, targets in testloader: + inputs, targets = inputs.to(device), targets.to(device) + snet(inputs) + outputs = snet.net_state[layer] + return torch.cat(outputs) if outputs else None + + +def get_discrepancy_for(baseline, approxed, testloader, changed_layer): + baseline_output = run_concat_output_at(baseline, testloader, changed_layer) + approxed_output = run_concat_output_at(approxed, testloader, changed_layer) + assert baseline_output.shape == approxed_output.shape + tqdm.write(f"{baseline_output.size()}") + diff = baseline_output - approxed_output + diff_rel = torch.abs(diff / baseline_output).cpu() + diff_rel[torch.isnan(diff_rel)] = 0 + diff_rel[diff_rel > 10] = 10 + return diff_rel + + +def main(): + prefix = Path('results/discrepancy/resnet50_imagenet_hpvm') + os.makedirs(prefix, exist_ok=True) + baseline, testloader, _, shapes = init_by_name('resnet50_imagenet_hpvm') + net_index = ModuleIndexer(baseline) + nas = NetApproxSelector(net_index) + total = sum(len(ns) for ns in nas.net_approxes.values()) + for layer, approx, approxed_net_dag in tqdm(nas.apply_indep_approx(), total=total): + if approx == 11: + continue + diff_rel = get_discrepancy_for(net_index, approxed_net_dag, testloader, layer) + fig, ax = plt.subplots() + seaborn.heatmap(diff_rel.mean(0).mean(0).numpy(), ax=ax) + fig.savefig((prefix / f'{layer}_{approx}.png').open('wb'), dpi=200) + plt.close(fig) + + +if __name__ == '__main__': + main() diff --git a/llvm/projects/pred_tuner/bin/filter_configs.py b/llvm/projects/pred_tuner/bin/filter_configs.py new file mode 100644 index 0000000000000000000000000000000000000000..bf23668b81ff0bdf071d27d9e010932ab07e6eea --- /dev/null +++ b/llvm/projects/pred_tuner/bin/filter_configs.py @@ -0,0 +1,54 @@ +from typing import List, Tuple + +from exp import Benchmark, ExpState, bench_tuner_data +from utils.config import Config + + +def filter_configs( + validation: List[Config], test: List[Config], + vali_threshold: float, test_threshold: float = 3.0 +) -> Tuple[List[Config], List[Config]]: + # Filter validation and test set by their respective thresholds + filtered_validation = [ + c for c in validation if c.avg_loss <= vali_threshold + ] + filtered_test = [ + c for c in test if c.avg_loss <= test_threshold + ] + # Test configs also need to be a subset of validation configs. + name_to_filtered = {x.fname: x for x in filtered_test} + intersect_names = set(list(name_to_filtered.keys())).intersection( + set((x.fname for x in filtered_validation)) + ) + filtered_test_ = [name_to_filtered[fname] for fname in intersect_names] + assert set([id(x) for x in filtered_test_]).issubset(set([id(x) for x in filtered_test])) + return filtered_validation, filtered_test_ + + +def process_configs(bench: Benchmark, calib_slack: float, states: ExpState): + validated_configs = states.validated_configs.configs + tested_configs = states.tested_configs.configs + old_len = len(validated_configs) + valid_configs, test_configs = filter_configs( + validated_configs, tested_configs, calib_slack + ) + states.valid_configs.finalize_dump(valid_configs) + states.test_configs.finalize_dump(test_configs) + print(f"{bench.model_name}: {old_len} -> {len(validated_configs)}, {len(tested_configs)}") + # Finalize data input and plot everything. + states.finalize_plot() + + +def main(): + for bench in bench_tuner_data.values(): + bench: Benchmark + try: + states = ExpState(bench) + except ValueError: + print(f"Model {bench.model_name} has incomplete experiment data; skipping") + continue + process_configs(bench, 2.1, states) + + +if __name__ == '__main__': + main() diff --git a/llvm/projects/pred_tuner/bin/inferences.py b/llvm/projects/pred_tuner/bin/inferences.py new file mode 100644 index 0000000000000000000000000000000000000000..065abfd223f0a5c234dd36cc8aca7324415ac96f --- /dev/null +++ b/llvm/projects/pred_tuner/bin/inferences.py @@ -0,0 +1,9 @@ +from tqdm import tqdm + +from models import BaselineInfo, networks +from utils import device + +if __name__ == '__main__': + for net_name in networks: + baseline_info = BaselineInfo.init_by_name(net_name, device) + tqdm.write(f"{net_name}: {baseline_info.val_qos} (validation) {baseline_info.test_qos} (test") diff --git a/llvm/projects/pred_tuner/bin/mock_autotuner.py b/llvm/projects/pred_tuner/bin/mock_autotuner.py new file mode 100644 index 0000000000000000000000000000000000000000..ec12e1643ab319e0120f2e95c7801825f04484bb --- /dev/null +++ b/llvm/projects/pred_tuner/bin/mock_autotuner.py @@ -0,0 +1,230 @@ +import gc +import json +import os +from pathlib import Path +from sys import argv +from typing import Dict, Iterable, Iterator, List, Optional, Tuple + +import matplotlib.pyplot as plt +import numpy as np +from tqdm import tqdm, trange + +from exp import Benchmark, bench_tuner_data +from toolkit import ConfigT, LinearCombEstimator, LinearEstimator, \ + LinearQoSEstimator, ModuleIndexer, NetApproxSelector, WeightedLinearCombEstimator +from toolkit.estimators import WeightedLinearQoSEstimator +from utils import config_pylogger, gpu_mem_mb, init_by_name, nn_to_accuracy, nn_to_output, qos_stats, tensor_to_accuracy + +msg_logger = config_pylogger(output_dir=Path('tuner_results/logs'), verbose=True) + + +class Evaluator: + def __init__( + self, nas: NetApproxSelector, n_approx_layers: Optional[int], + n_configs: int, testloader, threshold: Optional[float] + ): + self.nas = nas + self.layer_approxes = nas.net_approxes + self.n_approx_layers = n_approx_layers + self.n_configs = n_configs + self.testloader = testloader + self.threshold = threshold + self.config_accs = None + + def generate_random_configs(self) -> Iterator[ConfigT]: + from numpy.random import choice + from random import randrange + all_layers = [k for k, ns in self.layer_approxes.items() if ns] + for _ in range(self.n_configs): + config = {} + if self.n_approx_layers is None: + n_approx_layers_ = randrange(len(all_layers) + 1) + else: + n_approx_layers_ = min(self.n_approx_layers, len(all_layers)) + approx_layers = choice(all_layers, n_approx_layers_, replace=False) + for layer_idx in approx_layers: + config[layer_idx] = choice(self.layer_approxes[layer_idx], 1)[0] + yield config + + def evaluate_config(self, config: ConfigT) -> Tuple[float, float]: + deterministic = self.nas.is_deterministic(config) + n_runs = 1 if deterministic else 30 + approxed = self.nas.apply_approx_by_config(config).module + accs = [] + for _ in trange(n_runs, leave=None): + acc = nn_to_accuracy(approxed, self.testloader) + accs.append(acc) + mean, confident_acc, _ = qos_stats(accs, 0.95) + return mean, confident_acc + + def sort_configs_by_mean_acc(self): + sorted_ = sorted(self.config_accs, key=lambda p: p[1], reverse=True) + from itertools import takewhile + if self.threshold is not None: + sorted_ = list(takewhile(lambda p: p[1] > self.threshold, sorted_)) + self.config_accs = np.array(sorted_) + + @staticmethod + def calculate_perm_dist(pred_order): + n = len(pred_order) + actual_order = np.arange(n) + return np.linalg.norm(actual_order - pred_order, ord=1) / ((n ** 2 - 1) / 3) + + def use_predictors(self, predictors: Iterable[LinearEstimator]) -> \ + Optional[List[Tuple[np.ndarray, np.ndarray]]]: + self.sort_configs_by_mean_acc() + if len(self.config_accs) == 0: + return None + configs = self.config_accs[:, 0] + raw_prediction = [] + for predictor in predictors: + # N * 2 array: avg acc, 95% confidence acc + pred_accs = np.array([ + predictor.estimate(config) for config in configs + ]) + pred_order = (-pred_accs[:, 0]).argsort(kind='stable') + raw_prediction.append((pred_accs, pred_order)) + return raw_prediction + + def run_configs(self): + configs = self.generate_random_configs() + pbar = tqdm(configs) + config_accs = [] + for config in pbar: + pbar.set_postfix(mem=gpu_mem_mb()) + mean_acc, confident_acc = self.evaluate_config(config) + config_accs.append([config, mean_acc, confident_acc]) + gc.collect() + self.config_accs = np.array(config_accs) + + +class NumpyEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, np.ndarray): + return obj.tolist() + return json.JSONEncoder.default(self, obj) + + +class DataPlotStorage: + def __init__(self, save_to_prefix: Path): + self.save_to = save_to_prefix + os.makedirs(self.save_to.parent, exist_ok=True) + self.args = [] + self.fig, self.axes = plt.subplots() + + def plot(self, *args, **kwargs): + self.args.append({'args': args, 'kwargs': kwargs}) + self.axes.plot(*args, **kwargs) + + def errorbar(self, *args, **kwargs): + self.args.append({'args': args, 'kwargs': kwargs}) + self.axes.errorbar(*args, **kwargs) + + def save_and_close(self): + self.fig.savefig(self.save_to.with_suffix('.png'), dpi=200) + with self.save_to.with_suffix('.json').open('w') as f: + json.dump(self.args, f, cls=NumpyEncoder) + plt.close(self.fig) + + +def compare_estimators( + eva: Evaluator, predictors: Dict[str, LinearEstimator], n_runs: int, st: DataPlotStorage +): + all_dists = [] + for _ in trange(n_runs): + eva.run_configs() + raw_predictions = eva.use_predictors(predictors.values()) + dists = [eva.calculate_perm_dist(order) for _, order in raw_predictions] + all_dists.append(dists) + dists_t = zip(*all_dists) + for vs, label in zip(dists_t, predictors.keys()): + st.plot(sorted(vs), label=label) + st.axes.set_ylim(bottom=0) + st.fig.legend() + st.save_and_close() + + +def plot_acc_estm_discrepancy( + eva: Evaluator, predictors: Dict[str, LinearEstimator], st: DataPlotStorage +): + eva.run_configs() + raw_predictions = eva.use_predictors(predictors.values()) + if not raw_predictions: + return + measured_mean_accs = eva.config_accs[:, 1] + yerr = measured_mean_accs - eva.config_accs[:, 2] + st.errorbar( + measured_mean_accs, measured_mean_accs, fmt='.', yerr=yerr, uplims=True, label='baseline' + ) + for (pred_accs, _), label in zip(raw_predictions, predictors.keys()): + pred_accs = pred_accs + yerr = pred_accs[:, 0] - pred_accs[:, 1] + st.errorbar( + measured_mean_accs, pred_accs[:, 0], + fmt='.', yerr=yerr, uplims=True, label=label + ) + min_x, max_x = np.min(measured_mean_accs), np.max(measured_mean_accs) + diag_x = np.linspace(min_x, max_x, 500) + st.errorbar(diag_x, diag_x, linewidth=1) + st.axes.set_xlabel('Measured accuracy (%)') + st.axes.set_ylabel('Predicted accuracy (%)') + st.fig.legend() + st.save_and_close() + + +def train_predictors(eva: Evaluator, *predictors: LinearEstimator): + for conf in eva.generate_random_configs(): + for p in predictors: + p.estimate(conf) + + +def main(): + base_path = Path(argv[1]) if len(argv) > 1 else Path('results/mock_autotuner') + + for network in ( + 'alexnet2_hpvm', 'vgg16_cifar10_hpvm', 'vgg16_cifar100_hpvm', + 'mobilenet_hpvm', + 'resnet18_hpvm', + 'vgg16_imagenet_hpvm', 'resnet50_imagenet_hpvm' + ): + bench: Benchmark = bench_tuner_data[network] + print(f"{bench.model_name}: ") + baseline, testloader, _, shapes = init_by_name(bench.model_name) + baseline_dag = ModuleIndexer(baseline) + baseline_acc = nn_to_accuracy(baseline_dag.module, testloader) + nas = NetApproxSelector(baseline_dag) + + def acc_crit(inputs_): + return tensor_to_accuracy(inputs_, testloader) + + def threshold_eval(inputs_): + accs = np.array([acc_crit(x) for x in inputs_]) + return baseline_acc - accs.mean() < 3.0 + + def run_model(net): + return nn_to_output(net, testloader) + + f1 = LinearCombEstimator(nas, run_model, acc_crit, threshold_eval, 0.95, False) + f2 = LinearQoSEstimator(nas, run_model, acc_crit, threshold_eval, 0.95, False) + f3 = WeightedLinearCombEstimator(nas, run_model, acc_crit, threshold_eval, 0.95, False) + f4 = WeightedLinearQoSEstimator(nas, run_model, acc_crit, threshold_eval, 0.95, False) + LinearEstimator.coinit_estimators( + nas, run_model, threshold_eval, f1, f2, f3, f4, + storage=Path('model_params/pickles') / Path(bench.base_dir).name / 'proxy_dev.pkl' + ) + train_predictors(Evaluator(nas, None, 700, testloader, baseline_acc), f3, f4) + st = DataPlotStorage(base_path / "cmp_acc_diff" / f"{bench.model_name}") + plot_acc_estm_discrepancy( + Evaluator(nas, None, 200, testloader, baseline_acc - 10), + {'f1': f1, 'f2': f2, 'f3': f3, 'f4': f4}, st + ) + st = DataPlotStorage(base_path / 'cmp_ordering' / f"{bench.model_name}" / "n_none") + compare_estimators( + Evaluator(nas, None, 20, testloader, None), + {'f1': f1, 'f2': f2, 'f3': f3, 'f4': f4}, 10, st + ) + gc.collect() + + +if __name__ == '__main__': + main() diff --git a/llvm/projects/pred_tuner/bin/print_approxes.py b/llvm/projects/pred_tuner/bin/print_approxes.py new file mode 100644 index 0000000000000000000000000000000000000000..c95d080326ad2e806d772454c15bed68c573ca17 --- /dev/null +++ b/llvm/projects/pred_tuner/bin/print_approxes.py @@ -0,0 +1,35 @@ +from collections import defaultdict + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn +from tqdm import tqdm + +from models.domains import Accuracy +from models import BaselineInfo +from toolkit import NetApproxSelector +from utils import device + + +def main(): + baseline_info = BaselineInfo.init_by_name('mobilenet_hpvm', device) + nas = NetApproxSelector(baseline_info.baseline_net, dev_time_only=True, ignore_fp32=False) + table = defaultdict(dict) + pbar = tqdm(nas.list_single_approxes()) + for layer, approx, _ in pbar: + pbar.set_postfix(k=layer, i=approx) + approxed_net = nas.apply_approx_by_config({layer: approx}).module + acc: Accuracy = baseline_info.get_qos(approxed_net, baseline_info.val_loader) + table[layer][approx] = acc.to_scalar() + df = pd.DataFrame( + [pd.Series(list(d.values()), index=d.keys()) for d in table.values()], + index=list(table.keys()) + ) + with open('accuracy.json', 'w') as f: + df.to_json(f) + seaborn.heatmap(df.to_numpy()) + plt.savefig('accuracy.png', dpi=200) + + +if __name__ == '__main__': + main() diff --git a/llvm/projects/pred_tuner/bin/progress_graph.py b/llvm/projects/pred_tuner/bin/progress_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..0d7d0d5526f708e8049e3f185ebceebe68f4b778 --- /dev/null +++ b/llvm/projects/pred_tuner/bin/progress_graph.py @@ -0,0 +1,61 @@ +from itertools import groupby +from operator import itemgetter +from pathlib import Path +from typing import Tuple + +import matplotlib.pyplot as plt + +from exp import Benchmark, ExpState, batch_id, bench_tuner_data +from utils import Config + + +def finalize_figs(filename, ax, fig): + ax.legend() + ax.set_ylim(bottom=1.0) + fig.savefig(filename, dpi=200) + plt.close(fig) + + +def process_configs(bench: Benchmark, states: ExpState, shared_ax): + def get_features(c: Config) -> Tuple[int, int, float]: + *_, run_s, iter_s = c.fname.split('_') + return int(run_s), int(iter_s), c.speedup + + def get_max_speedup(group): + group = sorted(list(group), key=itemgetter(1)) + iter_max_speedup = [] + max_speedup = 0 + for _, i, speedup in group: + max_speedup = max(max_speedup, speedup) + iter_max_speedup.append((i, max_speedup)) + return iter_max_speedup + + run_iter_speedup = sorted( + [get_features(c) for c in states.all_configs.configs], key=itemgetter(0) + ) + run_groups = groupby(run_iter_speedup, key=itemgetter(0)) + fig, ax = plt.subplots() + for run, run_group in run_groups: + iter_max_speedup = get_max_speedup(run_group) + iters, max_speedups = zip(*iter_max_speedup) + ax.plot(iters, max_speedups, label=f"loss={run + 1}%") + if run + 1 == 3: + shared_ax.plot(iters, max_speedups, label=f"{bench.model_name.replace('_hpvm', '')}") + finalize_figs(bench.result_dir / f"tuner_progress.png", ax, fig) + + +def main(): + fig, ax = plt.subplots() + for bench in bench_tuner_data.values(): + bench: Benchmark + try: + states = ExpState(bench) + except ValueError: + print(f"Model {bench.model_name} has incomplete experiment data; skipping") + continue + process_configs(bench, states, ax) + finalize_figs(Path("results") / f"{batch_id}_tuner_progress.png", ax, fig) + + +if __name__ == '__main__': + main() diff --git a/llvm/projects/pred_tuner/bin/train_model.py b/llvm/projects/pred_tuner/bin/train_model.py new file mode 100644 index 0000000000000000000000000000000000000000..d3d0d80725f5784c42ec8f6a26b65ff183df1649 --- /dev/null +++ b/llvm/projects/pred_tuner/bin/train_model.py @@ -0,0 +1,186 @@ +"""Train CIFAR10 with PyTorch.""" +import argparse +import os +from typing import List + +import numpy as np +import torch +from torch import optim +from torch.nn import CrossEntropyLoss, Module +from torch.optim.lr_scheduler import ReduceLROnPlateau +from tqdm import tqdm + +from models.torch import ResNet18 +from models.datasets import get_cifar10_train_dataloader, get_cifar10_test_dataloader +from utils import device + + +class RunningStats: + def __init__(self, criterion): + self.criterion = criterion + self.all_outputs = None + self.all_targets = np.zeros([0]) + self.avg_loss, self.correct, self.total = 0, 0, 0 + self.conf_mat = None + self.n_batches = 0 + + @property + def n_classes(self): + if self.all_outputs is None: + raise RuntimeError("Num of classes is unknown before seeing first input") + return self.all_outputs.shape[1] + + def setup_for_first_output(self, outputs): + n_classes = outputs.shape[1] + self.all_outputs = np.zeros([0, n_classes]) + self.conf_mat = np.zeros([n_classes, n_classes]) + + def add_output(self, outputs, targets): + if self.all_outputs is None: + self.setup_for_first_output(outputs) + loss = self.criterion(outputs, targets) + _, predicted = outputs.max(1) + self.avg_loss = (self.avg_loss * self.n_batches + loss.item()) / (self.n_batches + 1) + self.total += targets.size(0) + self.correct += predicted.eq(targets).sum().item() + for t, p in zip(targets, predicted): + self.conf_mat[int(t), p] += 1 + self.n_batches += 1 + outputs = outputs.clone().cpu().detach() + targets = targets.clone().cpu().detach() + self.all_outputs = np.vstack([self.all_outputs, outputs]) + self.all_targets = np.hstack([self.all_targets, targets]) + return loss + + def classwise_outputs(self) -> List[np.ndarray]: + class_outputs = [np.zeros([0, self.n_classes]) for _ in range(self.n_classes)] + for output, label_class in zip(self.all_outputs, self.all_targets): + co = class_outputs[int(label_class)] + class_outputs[int(label_class)] = np.vstack([co, output]) + return class_outputs + + @property + def acc(self): + return 100. * self.correct / self.total + + @property + def classwise_acc(self) -> List[float]: + return [self.conf_mat[i, i] / self.conf_mat[i].sum() for i in range(self.n_classes)] + + +def test(net, testloader, criterion): + net.eval() + rs = RunningStats(criterion) + with torch.no_grad(): + pbar = tqdm(enumerate(testloader), total=len(testloader)) + for batch_idx, (inputs, targets) in pbar: + inputs, targets = inputs.to(device), targets.to(device) + outputs = net(inputs) + rs.add_output(outputs, targets) + pbar.set_postfix_str( + f"Loss: {rs.avg_loss:.3f} | Acc: {rs.acc:.3f}% ({rs.correct}/{rs.total})" + ) + return rs + + +def load_torch_checkpoint(net: Module, chpt_path: str): + print('==> Loading checkpoint..') + checkpoint = torch.load(chpt_path) + net.load_state_dict(checkpoint['net']) + start_epoch = checkpoint['epoch'] + return start_epoch + + +def get_optimizer(net, lr): + return optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) + + +class EarlyStopping: + """Early stops the training if validation loss doesn't improve after a given patience.""" + + def __init__(self, path, patience=7, delta=0): + """ + Args: + patience (int): How long to wait after last time validation loss improved. + Default: 7 + delta (float): Minimum change in the monitored quantity to qualify as an improvement. + Default: 0 + path (str): Path for the checkpoint to be saved to. + Default: 'checkpoint.pt' + """ + self.patience = patience + self.counter = 0 + self.min_loss = None + self.delta = delta + self.path = path + + def __call__(self, val_loss, model, epoch): + if self.min_loss is None or val_loss < self.min_loss - self.delta: + # Improved + self.min_loss = val_loss + self.save_checkpoint(model, epoch) + self.counter = 0 + else: + self.counter += 1 + if self.counter >= self.patience: + return True + return False + + def save_checkpoint(self, model, epoch): + tqdm.write('Saving..') + state = { + 'net': model.state_dict(), + 'epoch': epoch, + } + if not os.path.isdir(os.path.dirname(self.path)): + os.makedirs(os.path.dirname(self.path)) + torch.save(state, self.path) + + +def train_one_epoch(net, trainloader, optimizer, criterion): + net.train() + rs = RunningStats(criterion) + pbar = tqdm(trainloader) + for inputs, targets in pbar: + optimizer.zero_grad() + inputs, targets = inputs.to(device), targets.to(device) + outputs = net(inputs) + loss = rs.add_output(outputs, targets) + loss.backward() + optimizer.step() + pbar.set_postfix_str( + f"Loss: {rs.avg_loss:.3f} | Acc: {rs.acc:.3f}% ({rs.correct}/{rs.total})" + ) + + +def train(net, checkpoint, output, lr): + start_epoch = load_torch_checkpoint(net, checkpoint) if checkpoint else 0 + trainloader = get_cifar10_train_dataloader('./data', 128) + testloader = get_cifar10_test_dataloader('./data', 100) + criterion = CrossEntropyLoss() + optimizer = get_optimizer(net, lr) + es = EarlyStopping(output, patience=5) + reduce_lr = ReduceLROnPlateau(optimizer, factor=0.2, patience=3, verbose=True) + for epoch in range(start_epoch + 1, start_epoch + 200): + print('\nEpoch: %d' % epoch) + train_one_epoch(net, trainloader, optimizer, criterion) + rs = test(net, testloader, criterion) + if es(rs.avg_loss, net, epoch): + print(f"Early stopped at {epoch}") + break + reduce_lr.step(rs.avg_loss) + + +def main(): + parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training') + parser.add_argument('--lr', default=0.1, type=float, help='learning rate') + parser.add_argument('--resume', '-r', type=str, help='resume from checkpoint') + parser.add_argument( + '--output', '-o', type=str, required=True, help='path to save checkpoint to' + ) + args = parser.parse_args() + train(ResNet18().to(device), args.resume, args.output, args.lr) + + +if __name__ == '__main__': + main() diff --git a/llvm/projects/pred_tuner/exp.py b/llvm/projects/pred_tuner/exp.py new file mode 100644 index 0000000000000000000000000000000000000000..e7457d5b475d53f7a6c05fcea28f8b1cc4507c93 --- /dev/null +++ b/llvm/projects/pred_tuner/exp.py @@ -0,0 +1,438 @@ +import abc +import json +import os +from pathlib import Path +from typing import Dict, Iterable, List, Optional, Tuple, Type + +from torch.nn import Linear, Module +from torch.utils.data import DataLoader + +from models.domains import QoS, qos_stats +from models.hpvm import HPVMConvBundle +from models import BaselineInfo +from toolkit import LinearEstimator, NetApproxSelector +from utils import config_pylogger, get_knob_config_file, get_tensorrt_dir, device +from utils.config import Config, dump_rt_format_to, load_configs_from_dir, plot_configs + +batch_id = "batch405" +is_dev_time = False +ConfigT = Dict[int, int] +msg_logger = config_pylogger(output_dir=Path('tuner_results/logs'), verbose=True) + + +def get_layer_desc(path: Path) -> List[List[str]]: + with path.open() as f: + return [x.split() for x in f] + + +def get_layer_desc_in_pytorch(layer_desc: List[List[str]]) -> \ + Tuple[List[Optional[Module]], Dict[int, int]]: + desc = [] + remapping = {} + for ext_i, vals in enumerate(layer_desc): + if vals and 'conv' == vals[0]: + remapping[ext_i] = len(remapping) + desc.append(HPVMConvBundle) + elif vals and 'dense' == vals[0]: + remapping[ext_i] = len(remapping) + desc.append(Linear) + else: + desc.append(None) + return desc, remapping + + +def read_cost_file(layer_desc: List[List[str]], path: Path) -> List[float]: + with path.open() as f: + raw_costs = [float(x.strip()) for x in f] + costs = [] + raw_cost_it = 0 + for layer in layer_desc: + if 'conv' in layer or 'dense' in layer: + costs.append(raw_costs[raw_cost_it]) + raw_cost_it += 1 + else: + costs.append(0) + assert len(layer_desc) == len(costs) + return costs + + +def read_global_knobs_speedup(path: Path): + knobs_speedup = {} + with path.open() as f: + for x in f: + toks = x.split("\t") + ID = int(toks[0].split(",")[1]) + speedup = float(toks[2]) + knobs_speedup[ID] = speedup + return knobs_speedup + + +class Benchmark: + def __init__(self, json_data: dict): + self.json_data = json_data + self.model_name: str = self.model_name # RHS from json data + # Use baseline configuration as seed to aid the autotuner + # TODO: put this as a field in benchmarks.json + self.use_seed = self.model_name == 'resnet50_imagenet_hpvm' + tensorrt = get_tensorrt_dir() + self.cost_file = tensorrt / self.cost_file + self.layer_file = tensorrt / self.layer_file + self.knobs_config_file = tensorrt / "autotuner/data/global_knobs.txt" + self.batch_dir = tensorrt / self.base_dir / "loss_123" / batch_id + self.result_dir = self.batch_dir / ("dev_tuner" if is_dev_time else "inst_tuner") + + self.layer_desc = get_layer_desc(self.layer_file) + self.pytorch_layer_desc, self.layer_remap = get_layer_desc_in_pytorch(self.layer_desc) + msg_logger.debug(f"HPVM order to neutral order remapping, model {self.model_name}: {self.layer_remap}") + self.layer_costs = read_cost_file(self.layer_desc, self.cost_file) + self.knobs_speedup = read_global_knobs_speedup(get_knob_config_file()) + + def set_batch_id(self, batch_id_: str = batch_id, is_dev_time_: bool = is_dev_time): + tensorrt = get_tensorrt_dir() + self.batch_dir = tensorrt / self.base_dir / "loss_123" / batch_id_ + self.result_dir = self.batch_dir / ("dev_tuner" if is_dev_time_ else "inst_tuner") + + def __getattr__(self, item: str): + return self.json_data[item] + + def translate_config(self, autotuner: ConfigT) -> ConfigT: + ret = {} + for x, v in autotuner.items(): + if x not in self.layer_remap: + assert v == 11 + continue + ret[self.layer_remap[x]] = v + return ret + + def get_baseline_config(self, is_fp16: bool) -> ConfigT: + conf = {} + for layer_id, layer in enumerate(self.pytorch_layer_desc): + knob = 12 if layer is not None and is_fp16 else 11 + conf[layer_id] = knob + return conf + + def pattern_match_layer_knobs(self, module_to_knobs: Dict[Module, List[int]]) -> Dict[int, List[int]]: + conv_knobs = [knobs for m, knobs in module_to_knobs.items() if isinstance(m, HPVMConvBundle)] + linear_knobs = [knobs for m, knobs in module_to_knobs.items() if isinstance(m, Linear)] + assert len(conv_knobs) + len(linear_knobs) == len(module_to_knobs) + conv_knobs_idx, linear_knobs_idx = 0, 0 + ret = {} + for layer_id, module_ty in enumerate(self.pytorch_layer_desc): + if module_ty is HPVMConvBundle: + # PROMISE does not apply to first layer of LeNet. + if self.model_name == "lenet_hpvm" and layer_id == 0: + this_conv_knobs = [x for x in conv_knobs[conv_knobs_idx] if x >= 11] + else: + this_conv_knobs = conv_knobs[conv_knobs_idx] + ret[layer_id] = this_conv_knobs + [11] + conv_knobs_idx += 1 + elif module_ty is Linear: + ret[layer_id] = linear_knobs[linear_knobs_idx] + [11] + linear_knobs_idx += 1 + else: + ret[layer_id] = [11] + assert conv_knobs_idx == len(conv_knobs) + return ret + + def compute_config_cost(self, cfg: ConfigT) -> Tuple[float, float]: + orig_cost = 0.0 + total_cost = 0.0 + for layer, knob in cfg.items(): + op_cost = self.layer_costs[layer] + speedup = self.knobs_speedup[knob] + total_cost += (op_cost * 1.0 / speedup * 1.0) + orig_cost += op_cost + speedup = (orig_cost * 1.0) / (total_cost * 1.0) + return total_cost, speedup + + def get_n_layers(self) -> int: + return len(self.layer_desc) + + +class ConfigMeasurer(BaselineInfo): + def __init__( + self, net: Module, val_loader: DataLoader, test_loader: DataLoader, + non_tensor_output: bool, qos_class: Type[QoS], + nas: NetApproxSelector, bench: Benchmark + ): + super().__init__(net, val_loader, test_loader, non_tensor_output, qos_class) + self.nas = nas + self.bench_translate_config = bench.translate_config + self.layer_remap = {k: v for k, v in enumerate(list(self.nas.net_approxes.keys()))} + msg_logger.debug(f"Neutral order to module scanning order remapping: {self.layer_remap}") + self.bench = bench + msg_logger.info( + f"Model {bench.model_name} baseline accuracy = " + f"{self.val_qos} ({self.test_qos} test)" + ) + + def translate_config(self, autotuner_cfg: ConfigT): + autotuner_cfg = self.bench_translate_config(autotuner_cfg) + # Translate layer index from autotuner format (0, 1, 2...) + # to proxy format (actual layer index) + cfg = {self.layer_remap[k]: v for k, v in autotuner_cfg.items() if v != 11} + return cfg + + @classmethod + def init_from_bench(cls, bench: Benchmark) -> 'ConfigMeasurer': + bi = BaselineInfo.init_by_name(bench.model_name, device) + nas = NetApproxSelector(bi.baseline_net, dev_time_only=is_dev_time, ignore_fp32=not is_dev_time) + return cls( + bi.baseline_net, bi.val_loader, bi.test_loader, + bi.non_tensor_output, bi.qos_class, nas, bench + ) + + def proxy_estimate(self, cfg: ConfigT, proxy: LinearEstimator) -> Tuple[QoS, QoS]: + cfg = self.translate_config(cfg) + mean_acc, confident_acc = proxy.estimate(cfg) + return mean_acc, confident_acc + + def actual_measure( + self, cfg: ConfigT, n_runs: int, is_test_set: bool, threshold: QoS = None + ) -> Tuple[QoS, Optional[float]]: + cfg = self.translate_config(cfg) + approx = self.nas.apply_approx_by_config(cfg).module + dataloader = self.test_loader if is_test_set else self.val_loader + from tqdm import trange + qoses = [] + for _ in trange(n_runs, leave=None): + qoses.append(self.get_qos(approx, dataloader)) + mean, _, confidence = qos_stats(qoses, threshold=threshold) + return mean, confidence + + def get_knobs(self): + # Delaying computing knobs because nas can be modified externally (knobs filtered) + ext_layer_to_knobs = self.bench.pattern_match_layer_knobs(self.nas.get_layer_approxes()) + msg_logger.debug(f"Getting knobs:") + for layer, knobs in ext_layer_to_knobs.items(): + msg_logger.debug(f" {layer}: {knobs}") + return ext_layer_to_knobs + + +class PersistentState(abc.ABC): + def __init__(self): + self._substates: Dict[str, PersistentState] = {} + + def __setattr__(self, name, value): + if isinstance(value, PersistentState): + self._substates[name] = value + super().__setattr__(name, value) + + def dump(self): + self._dump_self() + for v in self._substates.values(): + v.dump() + + def load(self): + if self.filled(): + return + try: + self._load_self() + except (ValueError, RuntimeError, FileNotFoundError) as e: + msg_logger.info(f"Exception {e} when loading state") + for k, v in self._substates.items(): + v.load() + + def filled(self): + return self._self_is_initialized() and all((v.filled() for v in self._substates.values())) + + @abc.abstractmethod + def _dump_self(self): + pass + + @abc.abstractmethod + def _load_self(self): + pass + + @abc.abstractmethod + def _self_is_initialized(self) -> bool: + pass + + +class PersistentConfigs(PersistentState): + def __init__(self, bench: Benchmark, prefix: str, baseline_acc: QoS, rt_cpu: bool, rt_gpu: bool): + super().__init__() + self._data = [] + self._filled = False + self.bench = bench + self.prefix = prefix + self.baseline_qos = baseline_acc + self.rt_cpu_path = self.bench.result_dir / f"{prefix}_cpu.txt" if rt_cpu else None + self.rt_gpu_path = self.bench.result_dir / f"{prefix}_fp16.txt" if rt_gpu else None + + @property + def config_folder(self) -> Path: + return self.bench.result_dir / self.prefix + + @property + def configs(self) -> List[Config]: + return self._data + + def _load_self(self): + # Try reading autotuner configs and hpvm-rt configs + self._data = load_configs_from_dir(self.config_folder, self.baseline_qos) + # If hpvm-rt is not present, dump it. + # TODO: check rt format integrity + if ( + (self.rt_cpu_path and not self.rt_cpu_path.is_file()) or + (self.rt_cpu_path and not self.rt_cpu_path.is_file()) + ): + self.finalize_dump() + self._filled = True + + def _dump_self(self): + for conf in self._data: + self._dump_one(conf) + self.finalize_dump() + + def _self_is_initialized(self) -> bool: + return self._filled + + def _dump_one(self, config: Config): + if not self.config_folder.is_dir(): + os.mkdir(self.config_folder.as_posix()) + config_path = self.config_folder / config.fname + with config_path.open('w') as f: + f.write(config.to_tuner_format()) + + def append(self, config: Config): + self._data.append(config) + self._dump_one(config) + + def extend(self, configs: Iterable[Config]): + confs = [] + for conf in configs: + self._dump_one(conf) + confs.append(conf) + self._data.extend(confs) + + def finalize_dump(self, with_configs: Iterable[Config] = None): + if with_configs is not None: + self.extend(with_configs) + self._filled = True + dump_rt_format_to( + self.bench.layer_desc, self._data, self.baseline_qos, + self.rt_cpu_path, self.rt_gpu_path + ) + + +class TuningTime(PersistentState): + def __init__(self, path: Path): + super().__init__() + self.timers = {} + self.path = path + + def _load_self(self): + import re + with self.path.open() as f: + lines = f.readlines() + for line in lines: + line = line.strip() + if not line: + continue + match = re.match(r'Timer ([^=]+) = ([0-9.]+) hours', line) + if not match: + raise RuntimeError(f"File {self.path} malformed") + self.timers[match.group(1)] = float(match.group(2)) + + def _dump_self(self): + for k, v in self.timers.items(): + self._dump_one(k, v) + + def _self_is_initialized(self) -> bool: + return bool(self.timers) + + def _dump_one(self, key: str, value: float): + time_hrs = value / (60 * 60) + msg_logger.info(f"Timer {key} = {time_hrs:.3f} hours") + with self.path.open('a') as f: + f.write(f"Timer {key} = {time_hrs} hours\n") + + def add_timer(self, key: str, value: float): + self.timers[key] = value + self._dump_one(key, value) + + +class AccPair(PersistentState): + def __init__(self, path: Path, qos_class: Type[QoS]): + super().__init__() + self.path = path + self.qos_class = qos_class + self._data = None + + @property + def accs(self) -> Tuple[QoS, QoS]: + if self._data is None: + raise AttributeError("Accuracy not init'ed yet") + return self._data + + @accs.setter + def accs(self, value: Tuple[QoS, QoS]): + self._data = value + self._dump_self() + + def _load_self(self): + with self.path.open() as f: + acc_val, acc_test = [self.qos_class.parse(s) for s in f.read().split('\n')] + self._data = acc_val, acc_test + + def _dump_self(self): + with self.path.open('w') as f: + f.write(f"{self._data[0]}\n{self._data[1]}") + + def _self_is_initialized(self) -> bool: + return self._data is not None + + +class ExpState(PersistentState): + def __init__(self, bench: Benchmark, qos_class: Type[QoS], accs: Tuple[QoS, QoS] = None): + super().__init__() + self.bench = bench + self.baseline_accs = AccPair(bench.result_dir / 'baseline_acc.txt', qos_class) + self.baseline_accs.load() + if not self.baseline_accs.filled(): + if accs is None: + raise ValueError("Provide model baseline accuracy") + self.baseline_accs.accs = accs + acc_val, acc_test = self.baseline_accs.accs + self.all_configs = PersistentConfigs(bench, 'all', acc_val, False, False) + self.filtered_configs = PersistentConfigs(bench, 'filtered', acc_val, False, False) + self.validated_configs = PersistentConfigs(bench, 'validated', acc_val, False, False) + self.tested_configs = PersistentConfigs(bench, 'tested', acc_test, False, False) + self.valid_configs = PersistentConfigs(bench, 'valid', acc_val, True, True) + self.test_configs = PersistentConfigs(bench, 'test', acc_test, True, True) + self.timers = TuningTime(bench.result_dir / 'tuning_time.txt') + super().load() + + def _load_self(self): + pass + + def _dump_self(self): + pass + + def _self_is_initialized(self) -> bool: + return True + + def finalize_plot(self): + if not self.filled(): + raise RuntimeError("Cannot finalize before data slots are all filled") + plot_configs( + self.bench.result_dir / "all_plot.png", + all=self.all_configs.configs + ) + plot_configs( + self.bench.result_dir / "validated_tested_plot.png", + filtered=self.filtered_configs.configs, + validated=self.validated_configs.configs, + tested=self.tested_configs.configs + ) + plot_configs( + self.bench.result_dir / "filtered_plot.png", + valid=self.valid_configs.configs, + test=self.test_configs.configs + ) + + +with (Path(__file__).parent / 'utils/benchmarks.json').open() as f_: + benchmark_data = json.load(f_) +bench_tuner_data = {k: Benchmark(v) for k, v in benchmark_data.items()} diff --git a/llvm/projects/pred_tuner/model_params b/llvm/projects/pred_tuner/model_params new file mode 120000 index 0000000000000000000000000000000000000000..90aaa403fdbec5110e1c02431a7df3f31fed0dbf --- /dev/null +++ b/llvm/projects/pred_tuner/model_params @@ -0,0 +1 @@ +../hpvm-tensor-rt/model_params \ No newline at end of file diff --git a/llvm/projects/pred_tuner/models/__init__.py b/llvm/projects/pred_tuner/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..192f4b5bea17503603ba8f1208a22cea78af2897 --- /dev/null +++ b/llvm/projects/pred_tuner/models/__init__.py @@ -0,0 +1,3 @@ +from .networks import networks +from .inference import get_all_output, move_to_device_recursively, BaselineInfo +from .domains import QoS diff --git a/llvm/projects/pred_tuner/models/datasets/__init__.py b/llvm/projects/pred_tuner/models/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a1e35fcea0e29482abbace082f825aac6c8d608 --- /dev/null +++ b/llvm/projects/pred_tuner/models/datasets/__init__.py @@ -0,0 +1,2 @@ +from .hpvm import CIFAR, CIFARImage, HPVMDataset, ImageNet, MNIST +from .torch import get_cifar10_test_dataset, get_cifar10_test_dataloader, get_cifar10_train_dataloader diff --git a/llvm/projects/pred_tuner/models/datasets/hpvm.py b/llvm/projects/pred_tuner/models/datasets/hpvm.py new file mode 100644 index 0000000000000000000000000000000000000000..aa871d89d85493a0c8ad1237ed9e5e8b0b34ac49 --- /dev/null +++ b/llvm/projects/pred_tuner/models/datasets/hpvm.py @@ -0,0 +1,163 @@ +import logging +from pathlib import Path +from typing import Iterator, List, Tuple, TypeVar + +import numpy as np +import torch +from torch.utils.data.dataset import IterableDataset + +from models.hpvm import read_tensor_from_file + +RetT = Tuple[torch.Tensor, torch.Tensor] +T = TypeVar('T', bound='HPVMDataset') +msg_logger = logging.getLogger() + + +class HPVMDataset(IterableDataset): + def __init__(self, inputs: torch.Tensor, outputs: torch.Tensor): + self.inputs, self.outputs = inputs, outputs + + @classmethod + def from_file(cls, *args, **kwargs): + pass + + @property + def sample_input(self): + inputs, outputs = next(iter(self)) + return inputs + + def __len__(self) -> int: + return len(self.inputs) + + def __getitem__(self, idx) -> RetT: + if idx >= len(self): + raise IndexError("Dataset index out of range") + return self.inputs[idx], self.outputs[idx] + + def __iter__(self) -> Iterator[RetT]: + for i in range(len(self)): + yield self[i] + + +class HPVMDNNDataset(HPVMDataset): + @classmethod + def _from_file( + cls, input_file: Path, labels_file: Path, is_uint8_label: bool, + count: int, offset: int, *item_shapes: int + ): + # NOTE: assuming (N, *) ordering of inputs (such as NCHW, NHWC) + channel_size = np.prod(np.array(item_shapes)) + if count != -1: + count *= channel_size + offset *= channel_size + inputs = read_tensor_from_file( + input_file, -1, *item_shapes, count=count, offset=offset, + use_progress_bar=True + ) + label_read_ty = np.int8 if is_uint8_label else np.int32 + labels = read_tensor_from_file( + labels_file, -1, read_ty=label_read_ty, cast_ty=np.long, + count=count, offset=offset + ) + if inputs.size(0) != labels.size(0): + raise ValueError("Input and output have different number of data points") + msg_logger.info(f"{inputs.shape[0]} entries loaded from dataset.") + return cls(inputs, labels) + + @classmethod + def from_default_file(cls, prefix: str): + prefix = Path(prefix) + return cls.from_file( + Path(prefix) / 'input.bin', Path(prefix) / 'labels.bin' + ) + + +class MNIST(HPVMDNNDataset): + @classmethod + def from_file( + cls, input_file: Path, labels_file: Path, count: int = -1, offset: int = 0 + ): + return cls._from_file( + input_file, labels_file, True, count, offset, 1, 28, 28 + ) + + +class CIFAR(HPVMDNNDataset): + @classmethod + def from_file( + cls, input_file: Path, labels_file: Path, count: int = -1, offset: int = 0 + ): + return cls._from_file( + input_file, labels_file, True, count, offset, 3, 32, 32 + ) + + +class ImageNet(HPVMDNNDataset): + @classmethod + def from_file( + cls, input_file: Path, labels_file: Path, count: int = -1, offset: int = 0 + ): + return cls._from_file( + input_file, labels_file, False, count, offset, 3, 224, 224 + ) + + +class HPVMImageDataset(HPVMDataset): + @classmethod + def _from_file( + cls, input_file: Path, output_file: Path, + count: int, offset: int, input_shape: List[int], output_shape: List[int] + ): + # NOTE: assuming (N, *) ordering of inputs (such as NCHW, NHWC) + channel_size = np.prod(np.array(input_shape)) + if count != -1: + count *= channel_size + offset *= channel_size + inputs = read_tensor_from_file( + input_file, -1, *input_shape, count=count, offset=offset, + use_progress_bar=True + ) + outputs = read_tensor_from_file( + output_file, -1, *output_shape, count=count, offset=offset, + use_progress_bar=True + ) + print(f"(input={inputs.shape[0]}, output={outputs.shape[0]}) entries loaded from dataset.") + return cls(inputs, outputs) + + @classmethod + def from_default_file(cls, prefix: str): + prefix = Path(prefix) + return cls.from_file( + Path(prefix) / 'input.bin', Path(prefix) / 'canny_input.bin', + Path(prefix) / 'labels.bin', Path(prefix) / 'output.bin' + ) + + +class CIFARImage(HPVMImageDataset): + def __init__( + self, inputs: torch.Tensor, outputs: torch.Tensor, cifar: CIFAR + ): + super().__init__(inputs, outputs) + self.cifar = cifar + + @classmethod + def from_file( + cls, dnn_input_file: Path, image_input_file: Path, + labels_file: Path, output_file: Path, + batch_size: int = 100, count: int = -1, offset: int = 0 + ): + classifier = CIFAR.from_file(dnn_input_file, labels_file) + dataset = HPVMImageDataset._from_file( + image_input_file, output_file, count, offset, + [3, 128, 128], [1, 128, 128] + ) + return cls(dataset.inputs, dataset.outputs, classifier) + + def sample(self: 'CIFARImage', ratio: float) -> 'CIFARImage': + raise NotImplementedError() + + def __getitem__(self, idx): + if idx >= len(self): + raise IndexError("Dataset index out of range") + cifar_in, cifar_out = self.cifar[idx] + return (cifar_in, self.inputs[idx]), (cifar_out, self.outputs[idx]) diff --git a/llvm/projects/pred_tuner/models/datasets/torch.py b/llvm/projects/pred_tuner/models/datasets/torch.py new file mode 100644 index 0000000000000000000000000000000000000000..1b07bd17c744df733158dc5d84da3f1934e7cd3c --- /dev/null +++ b/llvm/projects/pred_tuner/models/datasets/torch.py @@ -0,0 +1,37 @@ +import logging + +from torch.utils.data import DataLoader +from torchvision.datasets import CIFAR10 +from torchvision.transforms import transforms + +msg_logger = logging.getLogger() + + +def get_cifar10_train_dataloader(root: str, batchsize: int) -> DataLoader: + transform_train = transforms.Compose([ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ]) + dl = DataLoader( + CIFAR10(root=root, train=True, download=True, transform=transform_train), + batch_size=batchsize, shuffle=True + ) + msg_logger.info(f"{len(dl)} entries loaded from training dataset.") + return dl + + +def get_cifar10_test_dataset(root: str) -> CIFAR10: + transform_test = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ]) + dataset = CIFAR10(root=root, train=False, download=True, transform=transform_test) + msg_logger.info(f"{len(dataset)} entries loaded from training dataset.") + return dataset + + +def get_cifar10_test_dataloader(root: str, batchsize: int) -> DataLoader: + dl = DataLoader(get_cifar10_test_dataset(root), batch_size=batchsize) + return dl diff --git a/llvm/projects/pred_tuner/models/domains/__init__.py b/llvm/projects/pred_tuner/models/domains/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..abe6c13a378fe61f9dee7b1c7a60950c1a58226a --- /dev/null +++ b/llvm/projects/pred_tuner/models/domains/__init__.py @@ -0,0 +1 @@ +from .qoses import QoS, Accuracy, qos_stats diff --git a/llvm/projects/pred_tuner/models/domains/qoses.py b/llvm/projects/pred_tuner/models/domains/qoses.py new file mode 100644 index 0000000000000000000000000000000000000000..0a1e7f2eb1050f5adcc4e25d7b65100e3141ae8a --- /dev/null +++ b/llvm/projects/pred_tuner/models/domains/qoses.py @@ -0,0 +1,317 @@ +import abc +from typing import Iterable, List, Optional, Tuple + +import numpy as np +import torch +from torch.utils.data import DataLoader + + +class QoS(abc.ABC): + @abc.abstractmethod + def __sub__(self, other: 'QoS') -> 'QoS': + pass + + @abc.abstractmethod + def __add__(self, other: 'QoS') -> 'QoS': + pass + + @abc.abstractmethod + def __truediv__(self, other: float) -> 'QoS': + pass + + @abc.abstractmethod + def __lt__(self, other: 'QoS') -> bool: + pass + + @abc.abstractmethod + def __eq__(self, other: 'QoS') -> bool: + pass + + def __gt__(self, other: 'QoS') -> bool: + return not self <= other + + def __le__(self, other: 'QoS') -> bool: + return self < other or self == other + + def __ge__(self, other: 'QoS') -> bool: + return not self < other + + @abc.abstractmethod + def __hash__(self): + pass + + @abc.abstractmethod + def __repr__(self) -> str: + pass + + @abc.abstractmethod + def to_scalar(self, relative_to=None) -> float: + pass + + @abc.abstractmethod + def numpy(self) -> np.ndarray: + pass + + @abc.abstractmethod + def null(self) -> 'QoS': + pass + + @staticmethod + @abc.abstractmethod + def parse(string: str) -> 'QoS': + pass + + @abc.abstractmethod + def min_positive_loss(self) -> 'QoS': + pass + + @staticmethod + @abc.abstractmethod + def suggested_tuner_thresholds(baseline: 'QoS') -> List['QoS']: + pass + + @staticmethod + @abc.abstractmethod + def suggested_val_threshold(baseline: 'QoS') -> 'QoS': + pass + + @staticmethod + @abc.abstractmethod + def suggested_test_threshold(baseline: 'QoS') -> 'QoS': + pass + + @staticmethod + @abc.abstractmethod + def from_output(output, ground_truth) -> 'QoS': + pass + + @classmethod + def combine_qoses(cls, qoses: Iterable['QoS']) -> 'QoS': + qoses = np.array(qoses) + return qoses.mean() + + @classmethod + def from_all_output(cls, outputs: List, dataloader: DataLoader) -> 'QoS': + if not outputs: + raise ValueError("Empty output has no QoS value") # Probably can result cls.null() + qoses = [] + for (_, gt_output), output in zip(dataloader, outputs): + qoses.append(cls.from_output(output, gt_output)) + return cls.combine_qoses(qoses) + + +class ScalarQoS(QoS, abc.ABC): + def __init__(self, value: float): + self.value = value + + def __sub__(self, other: 'ScalarQoS') -> 'ScalarQoS': + return self.__class__(self.value - other.value) + + def __add__(self, other: 'ScalarQoS') -> 'ScalarQoS': + return self.__class__(self.value + other.value) + + def __truediv__(self, other: float): + return self.__class__(self.value / other) + + def __lt__(self, other: 'ScalarQoS') -> bool: + return self.value < other.value + + def __eq__(self, other: 'ScalarQoS') -> bool: + return self.value == other.value + + def __hash__(self): + return hash(self.value) + + def __repr__(self) -> str: + return repr(self.value) + + def null(self) -> 'ScalarQoS': + return self.__class__(0.0) + + def to_scalar(self, relative_to=None) -> float: + return self.value + + def numpy(self) -> np.ndarray: + return np.array([self.value]) + + @classmethod + def parse(cls, string: str) -> 'ScalarQoS': + return cls(float(string)) + + +class Accuracy(ScalarQoS): + def __init__(self, accuracy: float): + super().__init__(accuracy) + + def min_positive_loss(self) -> 'Accuracy': + return Accuracy(0.05) if self.value < 0 else self + + @staticmethod + def suggested_tuner_thresholds(baseline: 'Accuracy') -> List['Accuracy']: + return [baseline - Accuracy(0.8), baseline - Accuracy(1.5), baseline - Accuracy(2.1)] + + @staticmethod + def suggested_val_threshold(baseline: 'Accuracy') -> 'Accuracy': + return baseline - Accuracy(2.1) + + @staticmethod + def suggested_test_threshold(baseline: 'Accuracy') -> 'Accuracy': + return baseline - Accuracy(3.0) + + @staticmethod + def from_output(output: torch.Tensor, ground_truth: torch.Tensor) -> 'Accuracy': + ground_truth = ground_truth.to(output.device) + correct = output.argmax(dim=1).eq(ground_truth).sum().item() + acc = correct / ground_truth.shape[0] + return Accuracy(acc * 100) + + +class PSNR(ScalarQoS): + artificial_max = 100 + + def __init__(self, psnr: float): + super().__init__(psnr) + + def min_positive_loss(self) -> 'PSNR': + return PSNR(1) if self.value < 0 else self + + @staticmethod + def suggested_tuner_thresholds(baseline: 'PSNR') -> List['PSNR']: + return [PSNR(30), PSNR(25), PSNR(20)] + + @staticmethod + def suggested_val_threshold(baseline: 'PSNR') -> 'PSNR': + return PSNR(20) + + @staticmethod + def suggested_test_threshold(baseline: 'PSNR') -> 'PSNR': + return PSNR(20) + + @staticmethod + def from_output(output: torch.Tensor, ground_truth: torch.Tensor) -> 'PSNR': + ground_truth = ground_truth.to(output.device) + if ground_truth.shape[0] != 0: + max_i = ground_truth.max() + mse = torch.sum((output - ground_truth) ** 2) / output.nelement() + psnr = (20 * torch.log10(max_i) - 10 * torch.log10(mse)).item() + else: + psnr = PSNR.artificial_max + return PSNR(psnr) + + +class MultiQoS(QoS, abc.ABC): + def __init__(self, *qoses: ScalarQoS): + self.qoses = qoses + + def __sub__(self, other: 'MultiQoS') -> 'MultiQoS': + assert type(self) == type(other) + return self.__class__(*(x - y for x, y in zip(self.qoses, other.qoses))) + + def __add__(self, other: 'MultiQoS') -> 'MultiQoS': + assert type(self) == type(other) + return self.__class__(*(x + y for x, y in zip(self.qoses, other.qoses))) + + def __truediv__(self, other: int): + return self.__class__(*(x / other for x in self.qoses)) + + def __lt__(self, other: 'MultiQoS') -> bool: + assert type(self) == type(other) + return all((x < y for x, y in zip(self.qoses, other.qoses))) + + def __eq__(self, other: 'MultiQoS') -> bool: + assert type(self) == type(other) + return all((x == y for x, y in zip(self.qoses, other.qoses))) + + def __hash__(self): + return hash(self.qoses) + + def __repr__(self) -> str: + return ','.join(repr(q) for q in self.qoses) + + def null(self) -> 'MultiQoS': + return MultiQoS(*(q.null() for q in self.qoses)) + + def numpy(self) -> np.ndarray: + return np.array([q.to_scalar() for q in self.qoses]) + + def min_positive_loss(self) -> 'MultiQoS': + return self.__class__(*(q.min_positive_loss() for q in self.qoses)) + + +PairT = Tuple[torch.Tensor, torch.Tensor] +TripleT = Tuple[torch.Tensor, torch.Tensor, torch.Tensor] + + +class AccuracyPSNR(MultiQoS): + def __init__(self, acc: Accuracy, psnr: PSNR): + super().__init__(acc, psnr) + + def to_scalar(self, relative_to: 'AccuracyPSNR' = None) -> float: + acc, psnr = self.qoses + if relative_to is not None: + thres_acc, thres_psnr = relative_to.qoses + punishment = (-1 if acc < thres_acc else 0) + (-1 if psnr < thres_psnr else 0) + else: + punishment = 0 + max_psnr = PSNR.artificial_max + normed_psnr = min(psnr.value, max_psnr) / max_psnr # [0, 1], higher better + acc = acc.value / 100 # [0, 1], higher better + combined = (acc + normed_psnr) / 2 # [0, 1], higher better + assert 0 <= combined <= 1 + return combined + punishment + + @staticmethod + def parse(string: str) -> 'AccuracyPSNR': + acc, psnr = string.split(',') + return AccuracyPSNR(Accuracy.parse(acc), PSNR.parse(psnr)) + + # noinspection PyTypeChecker + @staticmethod + def suggested_tuner_thresholds(baseline: 'AccuracyPSNR') -> List['AccuracyPSNR']: + ret = [] + for acc in Accuracy.suggested_tuner_thresholds(baseline.qoses[0]): + for psnr in PSNR.suggested_tuner_thresholds(baseline.qoses[1]): + ret.append(AccuracyPSNR(acc, psnr)) + return ret + + # noinspection PyTypeChecker + @staticmethod + def suggested_val_threshold(baseline: 'AccuracyPSNR') -> 'AccuracyPSNR': + return AccuracyPSNR( + Accuracy.suggested_val_threshold(baseline.qoses[0]), + PSNR.suggested_val_threshold(baseline.qoses[1]) + ) + + # noinspection PyTypeChecker + @staticmethod + def suggested_test_threshold(baseline: 'AccuracyPSNR') -> 'AccuracyPSNR': + return AccuracyPSNR( + Accuracy.suggested_test_threshold(baseline.qoses[0]), + PSNR.suggested_test_threshold(baseline.qoses[1]) + ) + + @staticmethod + def from_output(output: TripleT, ground_truth: PairT) -> 'AccuracyPSNR': + gt_labels, gt_images = ground_truth + labels, image_selection, images = output + gt_labels = gt_labels.to(labels.device) + gt_images = gt_images.to(images.device) + acc = Accuracy.from_output(labels, gt_labels) + gt_images = gt_images[image_selection] + psnr = PSNR.from_output(images, gt_images) + return AccuracyPSNR(acc, psnr) + + +def qos_stats(qoses: List[QoS], confidence: float = None, threshold: QoS = None) -> \ + Tuple[QoS, Optional[QoS], Optional[float]]: + qoses = np.array(qoses) + n_runs = len(qoses) + confidence_at_thres = np.count_nonzero(qoses > threshold) / n_runs if threshold else None + if confidence is None: + qos_at_confidence = None + else: + index = int((1 - confidence) * n_runs) + # Otherwise it's np.float64 and causes trouble with opentuner + qos_at_confidence = qoses[index] + mean_acc = qoses.mean() + return mean_acc, qos_at_confidence, confidence_at_thres diff --git a/llvm/projects/pred_tuner/models/hpvm/__init__.py b/llvm/projects/pred_tuner/models/hpvm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..337738c0bf41002f910acfb98b9e8073ebc10052 --- /dev/null +++ b/llvm/projects/pred_tuner/models/hpvm/__init__.py @@ -0,0 +1,7 @@ +from .alexnet import AlexNet, AlexNet2, AlexNetImageNet +from .alexnet_canny import AlexNet2Canny +from .layers import HPVMConvBundle, HPVMDNN, HPVMDefaultModule, read_tensor_from_file +from .lenet import LeNet +from .mobilenet import MobileNet +from .resnet import ResNet18, ResNet50 +from .vgg16 import VGG16Cifar10, VGG16Cifar100, VGG16ImageNet diff --git a/llvm/projects/pred_tuner/models/hpvm/alexnet.py b/llvm/projects/pred_tuner/models/hpvm/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..b7c9b6c3cae1e86ac699913b3f1d09af28c52705 --- /dev/null +++ b/llvm/projects/pred_tuner/models/hpvm/alexnet.py @@ -0,0 +1,49 @@ +from torch.nn import Linear, ReLU, Sequential, Tanh + +from .layers import HPVMConvBundle, HPVMDNN + + +class AlexNet(HPVMDNN): + def __init__(self): + convs = Sequential( + HPVMConvBundle(3, 64, 11, Tanh, pool_size=2, padding=5), + HPVMConvBundle(64, 192, 5, Tanh, pool_size=2, padding=2), + HPVMConvBundle(192, 384, 3, Tanh, padding=1), + HPVMConvBundle(384, 256, 3, Tanh, padding=1), + HPVMConvBundle(256, 256, 3, Tanh, pool_size=2, padding=1) + ) + linears = Sequential(Linear(4096, 10)) + super().__init__(convs, linears) + + +class AlexNet2(HPVMDNN): + def __init__(self): + convs = Sequential( + HPVMConvBundle(3, 32, 3, Tanh, padding=1), + HPVMConvBundle(32, 32, 3, Tanh, pool_size=2, padding=1), + HPVMConvBundle(32, 64, 3, Tanh, padding=1), + HPVMConvBundle(64, 64, 3, Tanh, pool_size=2, padding=1), + HPVMConvBundle(64, 128, 3, Tanh, padding=1), + HPVMConvBundle(128, 128, 3, Tanh, pool_size=2, padding=1) + ) + linears = Sequential(Linear(2048, 10)) + super().__init__(convs, linears) + + +class AlexNetImageNet(HPVMDNN): + def __init__(self): + convs = Sequential( + HPVMConvBundle(3, 64, 11, ReLU, padding=2, stride=4, pool_size=3, pool_stride=2), + HPVMConvBundle(64, 192, 5, ReLU, padding=2, pool_size=3, pool_stride=2), + HPVMConvBundle(192, 384, 3, ReLU, padding=1), + HPVMConvBundle(384, 256, 3, ReLU, padding=1), + HPVMConvBundle(256, 256, 3, ReLU, padding=1, pool_size=3, pool_stride=2) + ) + linears = Sequential( + Linear(9216, 4096), + ReLU(), + Linear(4096, 4096), + ReLU(), + Linear(4096, 1000), + ) + super().__init__(convs, linears) diff --git a/llvm/projects/pred_tuner/models/hpvm/alexnet_canny.py b/llvm/projects/pred_tuner/models/hpvm/alexnet_canny.py new file mode 100644 index 0000000000000000000000000000000000000000..5e610279121a5b368f4cdf64b72e0a2d6fe9289a --- /dev/null +++ b/llvm/projects/pred_tuner/models/hpvm/alexnet_canny.py @@ -0,0 +1,48 @@ +from typing import Iterable, Tuple + +import torch +from torch.nn import Softmax + +from .alexnet import AlexNet2 +from .layers import HPVMConvBundle, HPVMDefaultModule, ReduceKind, TensorReduce + + +class AlexNet2Canny(HPVMDefaultModule): + def __init__(self, on_classes: Iterable[int]): + super().__init__() + prototype = AlexNet2() + self.on_classes = list(on_classes) + self.convs = prototype.convs + self.linears = prototype.linears + self.softmax = Softmax(1) + self.reduce_1 = TensorReduce(1, ReduceKind.sum) + self.gaussian = HPVMConvBundle(1, 1, 5, padding=2, bias=False) + self.sobel_x = HPVMConvBundle(1, 1, 3, padding=1, bias=False) + self.sobel_y = HPVMConvBundle(1, 1, 3, padding=1, bias=False) + self.reduce_2 = TensorReduce(2, ReduceKind.max) + self.reduce_3 = TensorReduce(2, ReduceKind.max) + + def canny(self, images: torch.Tensor) -> torch.Tensor: + assert len(images.shape) == 4 # Assuming NCHW + grayscale = self.reduce_1(images) + grayscale = grayscale.unsqueeze(1) + denoised = self.gaussian(grayscale) + grad_x = self.sobel_x(denoised) + grad_y = self.sobel_y(denoised) + grad_mag = torch.sqrt(grad_x ** 2 + grad_y ** 2) + grad_max_1D = self.reduce_2(grad_mag) + grad_max = self.reduce_3(grad_max_1D) + grad_max = grad_max.unsqueeze(2).unsqueeze(3) + grad_mag_norm = grad_mag / grad_max + return grad_mag_norm + + def forward(self, inputs) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + from functools import reduce + from operator import ior + dnn_input, canny_input = inputs + conv_outputs = self.convs(dnn_input) + dnn_outputs = self.softmax(self.linears(conv_outputs.view(conv_outputs.shape[0], -1))) + classes = dnn_outputs.argmax(dim=1) + selection = reduce(ior, (classes == i for i in self.on_classes)) + selected_inputs = canny_input[selection] + return dnn_outputs, selection, self.canny(selected_inputs) diff --git a/llvm/projects/pred_tuner/models/hpvm/layers.py b/llvm/projects/pred_tuner/models/hpvm/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..fed66e7b1507ac4ca309de0dc0599dde9a926a8a --- /dev/null +++ b/llvm/projects/pred_tuner/models/hpvm/layers.py @@ -0,0 +1,223 @@ +from enum import Enum +from pathlib import Path +from typing import Callable, Dict, List, Optional, Tuple, Union + +import numpy as np +import torch +from torch.nn import AvgPool2d, BatchNorm2d, Conv2d, Linear, MaxPool2d, Module, Parameter, ReLU, Sequential, Softmax, \ + Tanh + + +def rsetattr(obj, attr, val): + pre, _, post = attr.rpartition('.') + return setattr(rgetattr(obj, pre) if pre else obj, post, val) + + +def rgetattr(obj, attr, *args): + def _getattr(obj_, attr_): + return getattr(obj_, attr_, *args) + + import functools + return functools.reduce(_getattr, attr.split('.'), obj) + + +def read_tensor_from_file( + filename: Union[str, Path], *shape: int, + read_ty=np.float32, cast_ty=np.float32, + count: int = -1, offset: int = 0, + use_progress_bar: bool = False +) -> torch.Tensor: + from tqdm import trange + block_size = 102400 + offset = offset * read_ty().itemsize + mmap = np.memmap(filename, dtype=read_ty, mode='r', offset=offset) + raw = np.empty_like(mmap) + n_entries = min(mmap.shape[0], count) if count != -1 else mmap.shape[0] + n_blocks = int(np.ceil(n_entries / block_size)) + iterable = trange(n_blocks) if use_progress_bar else range(n_blocks) + for block in iterable: + l, r = block * block_size, min(n_entries, (block + 1) * block_size) + raw[l:r] = mmap[l:r] + del mmap + if cast_ty != read_ty: + raw = raw.astype(cast_ty) + loaded_np = raw.reshape(shape) + return torch.from_numpy(loaded_np) + + +ActivT = Optional[Callable[[], Module]] +ArgsT = Union[List, Dict] +RangeT = Tuple[float, float] +RangeOT = Optional[RangeT] + + +class HPVMConvBundle(Module): + def __init__( + self, in_channels: int, out_channels: int, kernel_size: int, + activation: ActivT = None, + pool_size: Optional[int] = None, pool_stride: Optional[int] = None, + **conv_kwargs + ): + super().__init__() + self.conv = Conv2d(in_channels, out_channels, kernel_size, **conv_kwargs) + if pool_size is None: + self.pooling = Sequential() + else: + pool_stride = pool_stride or pool_size + self.pooling = MaxPool2d(pool_size, stride=pool_stride) + self.activation = Sequential() if activation is None else activation() + self.conv_ranges_ = None + + def forward(self, input_: torch.Tensor) -> torch.Tensor: + return self.activation(self.pooling(self.conv(input_))) + + def input_to_conv(self, input_: torch.Tensor) -> torch.Tensor: + bias = self.conv.bias + self.conv.bias = None + conv_out = self.conv(input_) + self.conv.bias = bias + return conv_out + + def conv_to_output(self, conv_output: torch.Tensor) -> torch.Tensor: + if self.conv.bias is not None: + broadcast_bias = self.conv.bias.reshape(1, -1, 1, 1) + return self.activation(self.pooling(conv_output + broadcast_bias)) + else: + return self.activation(self.pooling(conv_output)) + + def __getattr__(self, item): + if item in ('weight', 'bias'): + return getattr(self.conv, item) + return super(HPVMConvBundle, self).__getattr__(item) + + def __setattr__(self, key, value): + if key in ('weight', 'bias'): + setattr(self.conv, key, value) + else: + super(HPVMConvBundle, self).__setattr__(key, value) + + +class ReduceKind(Enum): + sum = 1 + max = 2 + + +class TensorReduce(Module): + def __init__(self, dim: int, kind: ReduceKind, skip_ratio: float = 0.0): + super().__init__() + self.dim = dim + self.skip_ratio = skip_ratio + if kind == ReduceKind.sum: + self.reducer = lambda x: x.sum(dim=0) # Because we transpose the input + self.normalizer = lambda x: x / (1 - self.skip_ratio) + elif kind == ReduceKind.max: + self.reducer = lambda x: x.max(dim=0)[0] + self.normalizer = lambda x: x + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + from math import ceil + inputs_t = inputs.transpose(0, self.dim) + if len(inputs) == 0: + dim_reduced = torch.zeros_like(inputs_t)[0] + else: + reduce_dim_size = inputs_t.size(0) + approxed_dim_size = int(ceil((1 - self.skip_ratio) * reduce_dim_size)) + # Take a contiguous chunk and reduce over it, ignore the rest + dim_reduced: torch.Tensor = self.normalizer(self.reducer(inputs_t[:approxed_dim_size])) + return dim_reduced.unsqueeze(0).transpose(0, self.dim).squeeze(self.dim) + + def change_skip_ratio(self, skip_ratio: float) -> 'TensorReduce': + return TensorReduce(self.dim, self.kind, skip_ratio) + + +def read_quant_ranges(prefix: Path): + range_file = prefix / 'quant_ranges.txt' + if not range_file.is_file(): + return None + with range_file.open() as f: + return [[float(field) for field in line.strip().split()] for line in f.readlines()] + + +class HPVMDefaultModule(Module): + @staticmethod + def load_into_layer( + layer: Module, attr_name: str, filename: str, prefix: Path, + is_linear_weight: bool = False + ): + tensor = rgetattr(layer, attr_name) + if is_linear_weight: + n_out, n_in = tensor.shape + loaded = read_tensor_from_file(prefix / filename, n_in, n_out).T + else: + loaded = read_tensor_from_file(prefix / filename, *tensor.shape) + if type(tensor) is Parameter: + loaded = Parameter(loaded, requires_grad=True) + rsetattr(layer, attr_name, loaded) + + @staticmethod + def install_quant_range(module: Module, values: List[float]): + in_min, in_max, w_min, w_max, b_min, b_max, out_min, out_max = values + module.conv_ranges = (in_min, in_max), (w_min, w_max), (b_min, b_max), (out_min, out_max) + + def default_load_hpvm_weights(self, prefix: str): + # TODO: this is probably better done with help of ModuleDAG + prefix = Path(prefix) + convs, group_convs, linears, bns = [], [], [], [] + weightless_types = AvgPool2d, MaxPool2d, ReLU, Tanh, Softmax, TensorReduce + container_types = (Sequential,) + for module in self.modules(): + if isinstance(module, HPVMConvBundle): + convs.append(module) + elif isinstance(module, Conv2d): + if module.groups != 1: + group_convs.append(module) + elif isinstance(module, Linear): + linears.append(module) + elif isinstance(module, BatchNorm2d): + bns.append(module) + elif type(module) in weightless_types: + pass + elif type(module) in container_types or len(list(module.children())) != 0: + continue + else: + raise RuntimeError(f"Layer type {type(module)} not understood") + load = self.load_into_layer + quant_ranges = read_quant_ranges(prefix) + quant_ranges_idx = 0 + for i, conv in enumerate(convs): + conv: HPVMConvBundle + load(conv, 'weight', f"conv2d_{i + 1}_w.bin", prefix) + if conv.bias is not None: + load(conv, 'bias', f"conv2d_{i + 1}_b.bin", prefix) + if quant_ranges is not None: + self.install_quant_range(conv, quant_ranges[quant_ranges_idx]) + quant_ranges_idx += 1 + for i, gconv in enumerate(group_convs): + load(gconv, 'weight', f"depthwise_conv2d_{i + 1}_w.bin", prefix) + if gconv.bias is not None: + load(gconv, 'bias', f"depthwise_conv2d_{i + 1}_b.bin", prefix) + for i, bn in enumerate(bns): + bn: BatchNorm2d + load(bn, 'weight', f"batch_normalization_{i + 1}_gamma.bin", prefix) + load(bn, 'bias', f"batch_normalization_{i + 1}_beta.bin", prefix) + load(bn, 'running_mean', f"batch_normalization_{i + 1}_mean.bin", prefix) + load(bn, 'running_var', f"batch_normalization_{i + 1}_variance.bin", prefix) + for i, linear in enumerate(linears): + load(linear, 'weight', f"dense_{i + 1}_w.bin", prefix, True) + load(linear, 'bias', f"dense_{i + 1}_b.bin", prefix) + if quant_ranges is not None: + self.install_quant_range(linear, quant_ranges[quant_ranges_idx]) + quant_ranges_idx += 1 + assert quant_ranges is None or len(quant_ranges) == quant_ranges_idx + + +class HPVMDNN(HPVMDefaultModule): + def __init__(self, convs: Sequential, linears: Sequential): + super().__init__() + self.convs = convs + self.linears = linears + self.softmax = Softmax(1) + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + outputs = self.convs(inputs) + return self.softmax(self.linears(outputs.view(outputs.shape[0], -1))) diff --git a/llvm/projects/pred_tuner/models/hpvm/lenet.py b/llvm/projects/pred_tuner/models/hpvm/lenet.py new file mode 100644 index 0000000000000000000000000000000000000000..0802b5f78d2c73d352afe68b16df74689e9aec68 --- /dev/null +++ b/llvm/projects/pred_tuner/models/hpvm/lenet.py @@ -0,0 +1,16 @@ +from torch.nn import Linear, Sequential, Tanh + +from .layers import HPVMConvBundle, HPVMDNN + + +class LeNet(HPVMDNN): + def __init__(self): + convs = Sequential( + HPVMConvBundle(1, 32, 5, Tanh, 2, padding=2), + HPVMConvBundle(32, 64, 5, Tanh, 2, padding=2) + ) + linears = Sequential( + Linear(7 * 7 * 64, 1024), Tanh(), + Linear(1024, 10), Tanh() + ) + super().__init__(convs, linears) diff --git a/llvm/projects/pred_tuner/models/hpvm/mobilenet.py b/llvm/projects/pred_tuner/models/hpvm/mobilenet.py new file mode 100644 index 0000000000000000000000000000000000000000..f48a214fc9c1d7ec52cd5a24ec0e8d82d38aaa6e --- /dev/null +++ b/llvm/projects/pred_tuner/models/hpvm/mobilenet.py @@ -0,0 +1,45 @@ +from torch.nn import AvgPool2d, BatchNorm2d, Conv2d, Linear, ReLU, Sequential + +from .layers import HPVMDNN, HPVMConvBundle + + +def _make_seq(in_channels, out_channels, c_kernel_size, gc_stride, gc_kernel_size=3): + return Sequential( + HPVMConvBundle( + in_channels, out_channels, c_kernel_size, + bias=False, padding=(c_kernel_size - 1) // 2 + ), + BatchNorm2d(out_channels, eps=0.001), + ReLU(), + Conv2d( + out_channels, out_channels, gc_kernel_size, + bias=False, stride=gc_stride, padding=(gc_kernel_size - 1) // 2, groups=out_channels + ), + BatchNorm2d(out_channels, eps=0.001), + ReLU() + ) + + +class MobileNet(HPVMDNN): + def __init__(self): + convs = Sequential( + _make_seq(3, 32, 3, 1), + _make_seq(32, 64, 1, 2), + _make_seq(64, 128, 1, 1), + _make_seq(128, 128, 1, 2), + _make_seq(128, 256, 1, 1), + _make_seq(256, 256, 1, 2), + _make_seq(256, 512, 1, 1), + _make_seq(512, 512, 1, 1), + _make_seq(512, 512, 1, 1), + _make_seq(512, 512, 1, 1), + _make_seq(512, 512, 1, 1), + _make_seq(512, 512, 1, 2), + _make_seq(512, 1024, 1, 1), + HPVMConvBundle(1024, 1024, 1, padding=0, bias=False), + BatchNorm2d(1024, eps=0.001), + ReLU(), + AvgPool2d(2) + ) + linears = Sequential(Linear(1024, 10)) + super().__init__(convs, linears) diff --git a/llvm/projects/pred_tuner/models/hpvm/resnet.py b/llvm/projects/pred_tuner/models/hpvm/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..fc42a00001792b59b593b668f6cf4e8a5a230d9d --- /dev/null +++ b/llvm/projects/pred_tuner/models/hpvm/resnet.py @@ -0,0 +1,96 @@ +from torch.nn import AvgPool2d, BatchNorm2d, Linear, Module, ReLU, Sequential + +from .layers import HPVMConvBundle, HPVMDNN + + +class BasicBlock(Module): + def __init__(self, ins, outs, shortcut=False): + super().__init__() + stride = 2 if shortcut else 1 + self.mainline = Sequential( + HPVMConvBundle(ins, outs, 3, ReLU, padding=1, stride=stride), + HPVMConvBundle(outs, outs, 3, padding=1) + ) + self.relu1 = ReLU() + self.shortcut = HPVMConvBundle(ins, outs, 1, stride=stride) \ + if shortcut else Sequential() + + def forward(self, input_): + return self.relu1(self.mainline(input_) + self.shortcut(input_)) + + +class ResNet18(HPVMDNN): + def __init__(self): + convs = Sequential( + HPVMConvBundle(3, 16, 3, ReLU, padding=1), + BasicBlock(16, 16), + BasicBlock(16, 16), + BasicBlock(16, 16), + BasicBlock(16, 32, True), + BasicBlock(32, 32), + BasicBlock(32, 32), + BasicBlock(32, 64, True), + BasicBlock(64, 64), + BasicBlock(64, 64), + AvgPool2d(8) + ) + linears = Sequential(Linear(64, 10)) + super().__init__(convs, linears) + + +class Bottleneck(Module): + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super(Bottleneck, self).__init__() + self.mainline = Sequential( + HPVMConvBundle(in_planes, planes, 1, stride=stride), + BatchNorm2d(planes, eps=0.001), + ReLU(), + HPVMConvBundle(planes, planes, 3, padding=1), + BatchNorm2d(planes, eps=0.001), + ReLU(), + HPVMConvBundle(planes, self.expansion * planes, 1), + BatchNorm2d(self.expansion * planes, eps=0.001) + ) + self.relu1 = ReLU() + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = Sequential( + HPVMConvBundle(in_planes, self.expansion * planes, 1, stride=stride), + BatchNorm2d(self.expansion * planes, eps=0.001) + ) + else: + self.shortcut = Sequential() + + def forward(self, input_): + return self.relu1(self.mainline(input_) + self.shortcut(input_)) + + +class ResNet50(HPVMDNN): + def __init__(self): + convs = Sequential( + HPVMConvBundle(3, 64, 7, ReLU, pool_size=3, pool_stride=2, padding=3, stride=2), + BatchNorm2d(64, eps=0.001), + Bottleneck(64, 64), + Bottleneck(256, 64), + Bottleneck(256, 64), + + Bottleneck(256, 128, stride=2), + Bottleneck(512, 128), + Bottleneck(512, 128), + Bottleneck(512, 128), + + Bottleneck(512, 256, stride=2), + Bottleneck(1024, 256), + Bottleneck(1024, 256), + Bottleneck(1024, 256), + Bottleneck(1024, 256), + Bottleneck(1024, 256), + + Bottleneck(1024, 512, stride=2), + Bottleneck(2048, 512), + Bottleneck(2048, 512), + AvgPool2d(7) + ) + linears = Sequential(Linear(2048, 1000)) + super().__init__(convs, linears) diff --git a/llvm/projects/pred_tuner/models/hpvm/vgg16.py b/llvm/projects/pred_tuner/models/hpvm/vgg16.py new file mode 100644 index 0000000000000000000000000000000000000000..b31c0d47ca43118cbc1f7ad43b517d6dc02dd223 --- /dev/null +++ b/llvm/projects/pred_tuner/models/hpvm/vgg16.py @@ -0,0 +1,44 @@ +from typing import Iterable + +from torch.nn import Linear, ReLU, Sequential + +from .layers import HPVMConvBundle, HPVMDNN + + +class _VGG16(HPVMDNN): + def __init__(self, linear_inouts: Iterable[int]): + convs = Sequential( + HPVMConvBundle(3, 64, 3, ReLU, padding=1), + HPVMConvBundle(64, 64, 3, ReLU, 2, padding=1), + HPVMConvBundle(64, 128, 3, ReLU, padding=1), + HPVMConvBundle(128, 128, 3, ReLU, 2, padding=1), + HPVMConvBundle(128, 256, 3, ReLU, padding=1), + HPVMConvBundle(256, 256, 3, ReLU, padding=1), + HPVMConvBundle(256, 256, 3, ReLU, 2, padding=1), + HPVMConvBundle(256, 512, 3, ReLU, padding=1), + HPVMConvBundle(512, 512, 3, ReLU, padding=1), + HPVMConvBundle(512, 512, 3, ReLU, 2, padding=1), + HPVMConvBundle(512, 512, 3, ReLU, padding=1), + HPVMConvBundle(512, 512, 3, ReLU, padding=1), + HPVMConvBundle(512, 512, 3, ReLU, 2, padding=1) + ) + linear_layers = [Linear(in_, out) for in_, out in zip(linear_inouts, linear_inouts[1:])] + linear_relus = [ReLU() for _ in range(2 * len(linear_layers) - 1)] + linear_relus[::2] = linear_layers + linears = Sequential(*linear_relus) + super().__init__(convs, linears) + + +class VGG16Cifar10(_VGG16): + def __init__(self): + super().__init__([512, 512, 10]) + + +class VGG16Cifar100(_VGG16): + def __init__(self): + super().__init__([512, 512, 100]) + + +class VGG16ImageNet(_VGG16): + def __init__(self): + super().__init__([25088, 4096, 4096, 1000]) diff --git a/llvm/projects/pred_tuner/models/inference.py b/llvm/projects/pred_tuner/models/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..d797e9e605d8c3363d20f09fb52eb4a78195a9ac --- /dev/null +++ b/llvm/projects/pred_tuner/models/inference.py @@ -0,0 +1,99 @@ +import logging +from typing import Type, Union + +import torch +from torch.nn import Module +from torch.utils.data import DataLoader, IterableDataset, Subset + +from .domains import QoS +from .hpvm import HPVMDNN, HPVMDefaultModule +from .networks import networks + +msg_logger = logging.getLogger(__name__) + + +def move_to_device_recursively(data: object, device_: Union[torch.device, str]): + if isinstance(data, torch.Tensor): + return data.to(device_) + if not hasattr(data, '__dict__'): + if isinstance(data, list): + return [move_to_device_recursively(x, device_) for x in data] + elif isinstance(data, tuple): + return tuple([move_to_device_recursively(x, device_) for x in data]) + else: + raise RuntimeError(f"Don't know how to manipulate {type(data)}") + for key, value in data.__dict__.items(): + data.__dict__[key] = move_to_device_recursively(value, device_) + return data + + +def _infer_net_device(net: Module): + return next(iter(net.parameters())).device + + +def get_all_output(net: Module, dataloader: DataLoader): + outputs = [] + device = _infer_net_device(net) + with torch.no_grad(): + for inputs, targets in dataloader: + inputs = move_to_device_recursively(inputs, device) + outputs.append(net(inputs)) + return outputs + + +def load_torch_checkpoint(net: Module, chpt_path: str): + msg_logger.info('==> Loading checkpoint..') + checkpoint = torch.load(chpt_path) + net.load_state_dict(checkpoint.pop('net')) + return checkpoint + + +class BaselineInfo: + def __init__( + self, net: Module, val_loader: DataLoader, test_loader: DataLoader, + non_tensor_output: bool, qos_class: Type[QoS] + ): + self.baseline_net = net + self.val_loader = val_loader + self.test_loader = test_loader + self.non_tensor_output = non_tensor_output + self.qos_class = qos_class + self.val_qos = self.get_qos(net, val_loader) + self.test_qos = self.get_qos(net, test_loader) + + def get_qos(self, net: Module, dataloader: DataLoader): + return self.qos_class.from_all_output(get_all_output(net, dataloader), dataloader) + + @staticmethod + def _split_dataset(dataset: IterableDataset, split_at: int): + return Subset(dataset, torch.arange(0, split_at)), \ + Subset(dataset, torch.arange(split_at, len(dataset))) + + @classmethod + def init_by_name(cls, model_name: str, device) -> 'BaselineInfo': + msg_logger.info('==> Building model..') + network_factory, dataset_factory, batchsize, prefix, qos_class = networks[model_name] + net = network_factory() + # 1. Load network weights + msg_logger.info('==> Loading checkpoint..') + if isinstance(net, HPVMDefaultModule): + net.default_load_hpvm_weights(prefix) + else: + load_torch_checkpoint(net, prefix) + net = net.eval().to(device) + # 2. Load dataset + msg_logger.info('==> Loading dataset...') + if isinstance(net, HPVMDNN): + dataset = dataset_factory(prefix) + non_tensor_output = False + elif isinstance(net, HPVMDefaultModule): # Is image benchmark + dataset = dataset_factory(prefix) + non_tensor_output = True + else: + dataset = dataset_factory('./data') + non_tensor_output = False + # 3. Split dataset + test_set, val_set = cls._split_dataset(dataset, 5000) + test_loader = DataLoader(test_set, batch_size=batchsize) + val_loader = DataLoader(val_set, batch_size=batchsize) + return cls(net, val_loader, test_loader, non_tensor_output, qos_class) diff --git a/llvm/projects/pred_tuner/models/networks.py b/llvm/projects/pred_tuner/models/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..a5611bcb3e681c618cc5f8d8d188e9afc2fb5687 --- /dev/null +++ b/llvm/projects/pred_tuner/models/networks.py @@ -0,0 +1,54 @@ +from . import hpvm +from .datasets import CIFAR, CIFARImage, MNIST, get_cifar10_test_dataset +from .domains import Accuracy +from .domains.qoses import AccuracyPSNR +from .torch import ResNet18, VGG + + +networks = { + 'lenet_hpvm': ( + hpvm.LeNet, MNIST.from_default_file, 5000, + 'model_params/lenet_mnist', Accuracy + ), + 'alexnet_hpvm': ( + hpvm.AlexNet, CIFAR.from_default_file, 2000, + 'model_params/alexnet_cifar10', Accuracy + ), + 'alexnet2_hpvm': ( + hpvm.AlexNet2, CIFAR.from_default_file, 2000, + 'model_params/alexnet2_cifar10', Accuracy + ), + 'vgg16_cifar10_hpvm': ( + hpvm.VGG16Cifar10, CIFAR.from_default_file, 500, + 'model_params/vgg16_cifar10', Accuracy + ), + 'vgg16_cifar100_hpvm': ( + hpvm.VGG16Cifar100, CIFAR.from_default_file, 500, + 'model_params/vgg16_cifar100', Accuracy + ), + 'mobilenet_hpvm': ( + hpvm.MobileNet, CIFAR.from_default_file, 1000, + 'model_params/mobilenet', Accuracy + ), + 'resnet18_hpvm': ( + hpvm.ResNet18, CIFAR.from_default_file, 1000, + 'model_params/resnet18_cifar10', Accuracy + ), + 'alexnet_imagenet_hpvm': ( + hpvm.AlexNetImageNet, CIFAR.from_default_file, 100, + 'model_params/alexnet_imagenet', Accuracy + ), + 'vgg16_imagenet_hpvm': ( + hpvm.VGG16ImageNet, CIFAR.from_default_file, 50, + 'model_params/vgg16_imagenet', Accuracy + ), + 'resnet50_imagenet_hpvm': ( + hpvm.ResNet50, CIFAR.from_default_file, 25, + 'model_params/resnet50_imagenet', Accuracy + ), + 'alexnet2_canny_hpvm': ( + lambda: hpvm.AlexNet2Canny(on_classes=[1, 2, 3, 4, 5]), + CIFARImage.from_default_file, 50, + 'model_params/alexnet2_canny', AccuracyPSNR + ) +} diff --git a/llvm/projects/pred_tuner/models/torch/__init__.py b/llvm/projects/pred_tuner/models/torch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aff98ce114a9f0797ed08e74db1184d727f94f2e --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/__init__.py @@ -0,0 +1,15 @@ +from .vgg import * +from .dpn import * +from .lenet import * +from .senet import * +from .pnasnet import * +from .densenet import * +from .googlenet import * +from .shufflenet import * +from .shufflenetv2 import * +from .resnet import * +from .resnext import * +from .preact_resnet import * +from .mobilenet import * +from .mobilenetv2 import * +from .efficientnet import * diff --git a/llvm/projects/pred_tuner/models/torch/densenet.py b/llvm/projects/pred_tuner/models/torch/densenet.py new file mode 100644 index 0000000000000000000000000000000000000000..47ebbbe08e40503d6785711acd8bd7dd2cdba768 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/densenet.py @@ -0,0 +1,107 @@ +'''DenseNet in PyTorch.''' +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Bottleneck(nn.Module): + def __init__(self, in_planes, growth_rate): + super(Bottleneck, self).__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False) + self.bn2 = nn.BatchNorm2d(4*growth_rate) + self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) + + def forward(self, x): + out = self.conv1(F.relu(self.bn1(x))) + out = self.conv2(F.relu(self.bn2(out))) + out = torch.cat([out,x], 1) + return out + + +class Transition(nn.Module): + def __init__(self, in_planes, out_planes): + super(Transition, self).__init__() + self.bn = nn.BatchNorm2d(in_planes) + self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False) + + def forward(self, x): + out = self.conv(F.relu(self.bn(x))) + out = F.avg_pool2d(out, 2) + return out + + +class DenseNet(nn.Module): + def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10): + super(DenseNet, self).__init__() + self.growth_rate = growth_rate + + num_planes = 2*growth_rate + self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False) + + self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) + num_planes += nblocks[0]*growth_rate + out_planes = int(math.floor(num_planes*reduction)) + self.trans1 = Transition(num_planes, out_planes) + num_planes = out_planes + + self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) + num_planes += nblocks[1]*growth_rate + out_planes = int(math.floor(num_planes*reduction)) + self.trans2 = Transition(num_planes, out_planes) + num_planes = out_planes + + self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) + num_planes += nblocks[2]*growth_rate + out_planes = int(math.floor(num_planes*reduction)) + self.trans3 = Transition(num_planes, out_planes) + num_planes = out_planes + + self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) + num_planes += nblocks[3]*growth_rate + + self.bn = nn.BatchNorm2d(num_planes) + self.linear = nn.Linear(num_planes, num_classes) + + def _make_dense_layers(self, block, in_planes, nblock): + layers = [] + for i in range(nblock): + layers.append(block(in_planes, self.growth_rate)) + in_planes += self.growth_rate + return nn.Sequential(*layers) + + def forward(self, x): + out = self.conv1(x) + out = self.trans1(self.dense1(out)) + out = self.trans2(self.dense2(out)) + out = self.trans3(self.dense3(out)) + out = self.dense4(out) + out = F.avg_pool2d(F.relu(self.bn(out)), 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + +def DenseNet121(): + return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32) + +def DenseNet169(): + return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32) + +def DenseNet201(): + return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32) + +def DenseNet161(): + return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48) + +def densenet_cifar(): + return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12) + +def test(): + net = densenet_cifar() + x = torch.randn(1,3,32,32) + y = net(x) + print(y) + +# test() diff --git a/llvm/projects/pred_tuner/models/torch/dpn.py b/llvm/projects/pred_tuner/models/torch/dpn.py new file mode 100644 index 0000000000000000000000000000000000000000..d334367fcc9876b104a94b7ae333362ea0a64469 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/dpn.py @@ -0,0 +1,98 @@ +'''Dual Path Networks in PyTorch.''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Bottleneck(nn.Module): + def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer): + super(Bottleneck, self).__init__() + self.out_planes = out_planes + self.dense_depth = dense_depth + + self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False) + self.bn2 = nn.BatchNorm2d(in_planes) + self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(out_planes+dense_depth) + + self.shortcut = nn.Sequential() + if first_layer: + self.shortcut = nn.Sequential( + nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(out_planes+dense_depth) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + x = self.shortcut(x) + d = self.out_planes + out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1) + out = F.relu(out) + return out + + +class DPN(nn.Module): + def __init__(self, cfg): + super(DPN, self).__init__() + in_planes, out_planes = cfg['in_planes'], cfg['out_planes'] + num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth'] + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.last_planes = 64 + self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1) + self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2) + self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2) + self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2) + self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10) + + def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for i,stride in enumerate(strides): + layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0)) + self.last_planes = out_planes + (i+2) * dense_depth + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def DPN26(): + cfg = { + 'in_planes': (96,192,384,768), + 'out_planes': (256,512,1024,2048), + 'num_blocks': (2,2,2,2), + 'dense_depth': (16,32,24,128) + } + return DPN(cfg) + +def DPN92(): + cfg = { + 'in_planes': (96,192,384,768), + 'out_planes': (256,512,1024,2048), + 'num_blocks': (3,4,20,3), + 'dense_depth': (16,32,24,128) + } + return DPN(cfg) + + +def test(): + net = DPN92() + x = torch.randn(1,3,32,32) + y = net(x) + print(y) + +# test() diff --git a/llvm/projects/pred_tuner/models/torch/efficientnet.py b/llvm/projects/pred_tuner/models/torch/efficientnet.py new file mode 100644 index 0000000000000000000000000000000000000000..6a10a97468b5a505d5ea4bf1b5b53859dacef233 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/efficientnet.py @@ -0,0 +1,99 @@ +'''EfficientNet in PyTorch. + +Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks". +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Block(nn.Module): + '''expand + depthwise + pointwise + squeeze-excitation''' + + def __init__(self, in_planes, out_planes, expansion, stride): + super(Block, self).__init__() + self.stride = stride + + planes = expansion * in_planes + self.conv1 = nn.Conv2d( + in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, + stride=stride, padding=1, groups=planes, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn3 = nn.BatchNorm2d(out_planes) + + self.shortcut = nn.Sequential() + if stride == 1 and in_planes != out_planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, out_planes, kernel_size=1, + stride=1, padding=0, bias=False), + nn.BatchNorm2d(out_planes), + ) + + # SE layers + self.fc1 = nn.Conv2d(out_planes, out_planes//16, kernel_size=1) + self.fc2 = nn.Conv2d(out_planes//16, out_planes, kernel_size=1) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + shortcut = self.shortcut(x) if self.stride == 1 else out + # Squeeze-Excitation + w = F.avg_pool2d(out, out.size(2)) + w = F.relu(self.fc1(w)) + w = self.fc2(w).sigmoid() + out = out * w + shortcut + return out + + +class EfficientNet(nn.Module): + def __init__(self, cfg, num_classes=10): + super(EfficientNet, self).__init__() + self.cfg = cfg + self.conv1 = nn.Conv2d(3, 32, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(32) + self.layers = self._make_layers(in_planes=32) + self.linear = nn.Linear(cfg[-1][1], num_classes) + + def _make_layers(self, in_planes): + layers = [] + for expansion, out_planes, num_blocks, stride in self.cfg: + strides = [stride] + [1]*(num_blocks-1) + for stride in strides: + layers.append(Block(in_planes, out_planes, expansion, stride)) + in_planes = out_planes + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layers(out) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def EfficientNetB0(): + # (expansion, out_planes, num_blocks, stride) + cfg = [(1, 16, 1, 2), + (6, 24, 2, 1), + (6, 40, 2, 2), + (6, 80, 3, 2), + (6, 112, 3, 1), + (6, 192, 4, 2), + (6, 320, 1, 2)] + return EfficientNet(cfg) + + +def test(): + net = EfficientNetB0() + x = torch.randn(2, 3, 32, 32) + y = net(x) + print(y.shape) + + +# test() diff --git a/llvm/projects/pred_tuner/models/torch/googlenet.py b/llvm/projects/pred_tuner/models/torch/googlenet.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed8f6eb236d966f206f457e1637e11fecd44408 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/googlenet.py @@ -0,0 +1,106 @@ +"""GoogLeNet with PyTorch.""" +import torch +import torch.nn as nn + + +class Inception(nn.Module): + def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): + super(Inception, self).__init__() + # 1x1 conv branch + self.b1 = nn.Sequential( + nn.Conv2d(in_planes, n1x1, kernel_size=1), + nn.BatchNorm2d(n1x1), + nn.ReLU(True), + ) + + # 1x1 conv -> 3x3 conv branch + self.b2 = nn.Sequential( + nn.Conv2d(in_planes, n3x3red, kernel_size=1), + nn.BatchNorm2d(n3x3red), + nn.ReLU(True), + nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1), + nn.BatchNorm2d(n3x3), + nn.ReLU(True), + ) + + # 1x1 conv -> 5x5 conv branch + self.b3 = nn.Sequential( + nn.Conv2d(in_planes, n5x5red, kernel_size=1), + nn.BatchNorm2d(n5x5red), + nn.ReLU(True), + nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1), + nn.BatchNorm2d(n5x5), + nn.ReLU(True), + nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1), + nn.BatchNorm2d(n5x5), + nn.ReLU(True), + ) + + # 3x3 pool -> 1x1 conv branch + self.b4 = nn.Sequential( + nn.MaxPool2d(3, stride=1, padding=1), + nn.Conv2d(in_planes, pool_planes, kernel_size=1), + nn.BatchNorm2d(pool_planes), + nn.ReLU(True), + ) + + def forward(self, x): + y1 = self.b1(x) + y2 = self.b2(x) + y3 = self.b3(x) + y4 = self.b4(x) + return torch.cat([y1, y2, y3, y4], 1) + + +class GoogLeNet(nn.Module): + def __init__(self): + super(GoogLeNet, self).__init__() + self.pre_layers = nn.Sequential( + nn.Conv2d(3, 192, kernel_size=3, padding=1), + nn.BatchNorm2d(192), + nn.ReLU(True), + ) + + self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) + self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) + + self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) + + self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) + self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) + self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) + self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) + self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) + + self.a5 = Inception(832, 256, 160, 320, 32, 128, 128) + self.b5 = Inception(832, 384, 192, 384, 48, 128, 128) + + self.avgpool = nn.AvgPool2d(8, stride=1) + self.linear = nn.Linear(1024, 10) + + def forward(self, x): + out = self.pre_layers(x) + out = self.a3(out) + out = self.b3(out) + out = self.maxpool(out) + out = self.a4(out) + out = self.b4(out) + out = self.c4(out) + out = self.d4(out) + out = self.e4(out) + out = self.maxpool(out) + out = self.a5(out) + out = self.b5(out) + out = self.avgpool(out) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def test(): + net = GoogLeNet() + x = torch.randn(1, 3, 32, 32) + y = net(x) + print(y.size()) + +# test() diff --git a/llvm/projects/pred_tuner/models/torch/lenet.py b/llvm/projects/pred_tuner/models/torch/lenet.py new file mode 100644 index 0000000000000000000000000000000000000000..d657b7482a75a3058e5795f367dfbb32e948b9d5 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/lenet.py @@ -0,0 +1,23 @@ +'''LeNet in PyTorch.''' +import torch.nn as nn +import torch.nn.functional as F + +class LeNet(nn.Module): + def __init__(self): + super(LeNet, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16*5*5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + out = F.relu(self.conv1(x)) + out = F.max_pool2d(out, 2) + out = F.relu(self.conv2(out)) + out = F.max_pool2d(out, 2) + out = out.view(out.size(0), -1) + out = F.relu(self.fc1(out)) + out = F.relu(self.fc2(out)) + out = self.fc3(out) + return out diff --git a/llvm/projects/pred_tuner/models/torch/mobilenet.py b/llvm/projects/pred_tuner/models/torch/mobilenet.py new file mode 100644 index 0000000000000000000000000000000000000000..497ef1e867d2a597b9b444ebc7a6f30cd5219777 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/mobilenet.py @@ -0,0 +1,61 @@ +'''MobileNet in PyTorch. + +See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" +for more details. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Block(nn.Module): + '''Depthwise conv + Pointwise conv''' + def __init__(self, in_planes, out_planes, stride=1): + super(Block, self).__init__() + self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False) + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn2 = nn.BatchNorm2d(out_planes) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + return out + + +class MobileNet(nn.Module): + # (128,2) means conv planes=128, conv stride=2, by default conv stride=1 + cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024] + + def __init__(self, num_classes=10): + super(MobileNet, self).__init__() + self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(32) + self.layers = self._make_layers(in_planes=32) + self.linear = nn.Linear(1024, num_classes) + + def _make_layers(self, in_planes): + layers = [] + for x in self.cfg: + out_planes = x if isinstance(x, int) else x[0] + stride = 1 if isinstance(x, int) else x[1] + layers.append(Block(in_planes, out_planes, stride)) + in_planes = out_planes + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layers(out) + out = F.avg_pool2d(out, 2) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def test(): + net = MobileNet() + x = torch.randn(1,3,32,32) + y = net(x) + print(y.size()) + +# test() diff --git a/llvm/projects/pred_tuner/models/torch/mobilenetv2.py b/llvm/projects/pred_tuner/models/torch/mobilenetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..17e5823ef4426ceceae462782a267f89b1ecbc76 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/mobilenetv2.py @@ -0,0 +1,86 @@ +'''MobileNetV2 in PyTorch. + +See the paper "Inverted Residuals and Linear Bottlenecks: +Mobile Networks for Classification, Detection and Segmentation" for more details. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Block(nn.Module): + '''expand + depthwise + pointwise''' + def __init__(self, in_planes, out_planes, expansion, stride): + super(Block, self).__init__() + self.stride = stride + + planes = expansion * in_planes + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn3 = nn.BatchNorm2d(out_planes) + + self.shortcut = nn.Sequential() + if stride == 1 and in_planes != out_planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(out_planes), + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out = out + self.shortcut(x) if self.stride==1 else out + return out + + +class MobileNetV2(nn.Module): + # (expansion, out_planes, num_blocks, stride) + cfg = [(1, 16, 1, 1), + (6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10 + (6, 32, 3, 2), + (6, 64, 4, 2), + (6, 96, 3, 1), + (6, 160, 3, 2), + (6, 320, 1, 1)] + + def __init__(self, num_classes=10): + super(MobileNetV2, self).__init__() + # NOTE: change conv1 stride 2 -> 1 for CIFAR10 + self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(32) + self.layers = self._make_layers(in_planes=32) + self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False) + self.bn2 = nn.BatchNorm2d(1280) + self.linear = nn.Linear(1280, num_classes) + + def _make_layers(self, in_planes): + layers = [] + for expansion, out_planes, num_blocks, stride in self.cfg: + strides = [stride] + [1]*(num_blocks-1) + for stride in strides: + layers.append(Block(in_planes, out_planes, expansion, stride)) + in_planes = out_planes + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layers(out) + out = F.relu(self.bn2(self.conv2(out))) + # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10 + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def test(): + net = MobileNetV2() + x = torch.randn(2,3,32,32) + y = net(x) + print(y.size()) + +# test() diff --git a/llvm/projects/pred_tuner/models/torch/pnasnet.py b/llvm/projects/pred_tuner/models/torch/pnasnet.py new file mode 100644 index 0000000000000000000000000000000000000000..de8c4d51f2667f84eab86f29be9a00ea7d0ad1c3 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/pnasnet.py @@ -0,0 +1,125 @@ +'''PNASNet in PyTorch. + +Paper: Progressive Neural Architecture Search +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class SepConv(nn.Module): + '''Separable Convolution.''' + def __init__(self, in_planes, out_planes, kernel_size, stride): + super(SepConv, self).__init__() + self.conv1 = nn.Conv2d(in_planes, out_planes, + kernel_size, stride, + padding=(kernel_size-1)//2, + bias=False, groups=in_planes) + self.bn1 = nn.BatchNorm2d(out_planes) + + def forward(self, x): + return self.bn1(self.conv1(x)) + + +class CellA(nn.Module): + def __init__(self, in_planes, out_planes, stride=1): + super(CellA, self).__init__() + self.stride = stride + self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) + if stride==2: + self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn1 = nn.BatchNorm2d(out_planes) + + def forward(self, x): + y1 = self.sep_conv1(x) + y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) + if self.stride==2: + y2 = self.bn1(self.conv1(y2)) + return F.relu(y1+y2) + +class CellB(nn.Module): + def __init__(self, in_planes, out_planes, stride=1): + super(CellB, self).__init__() + self.stride = stride + # Left branch + self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) + self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride) + # Right branch + self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride) + if stride==2: + self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn1 = nn.BatchNorm2d(out_planes) + # Reduce channels + self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) + self.bn2 = nn.BatchNorm2d(out_planes) + + def forward(self, x): + # Left branch + y1 = self.sep_conv1(x) + y2 = self.sep_conv2(x) + # Right branch + y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) + if self.stride==2: + y3 = self.bn1(self.conv1(y3)) + y4 = self.sep_conv3(x) + # Concat & reduce channels + b1 = F.relu(y1+y2) + b2 = F.relu(y3+y4) + y = torch.cat([b1,b2], 1) + return F.relu(self.bn2(self.conv2(y))) + +class PNASNet(nn.Module): + def __init__(self, cell_type, num_cells, num_planes): + super(PNASNet, self).__init__() + self.in_planes = num_planes + self.cell_type = cell_type + + self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(num_planes) + + self.layer1 = self._make_layer(num_planes, num_cells=6) + self.layer2 = self._downsample(num_planes*2) + self.layer3 = self._make_layer(num_planes*2, num_cells=6) + self.layer4 = self._downsample(num_planes*4) + self.layer5 = self._make_layer(num_planes*4, num_cells=6) + + self.linear = nn.Linear(num_planes*4, 10) + + def _make_layer(self, planes, num_cells): + layers = [] + for _ in range(num_cells): + layers.append(self.cell_type(self.in_planes, planes, stride=1)) + self.in_planes = planes + return nn.Sequential(*layers) + + def _downsample(self, planes): + layer = self.cell_type(self.in_planes, planes, stride=2) + self.in_planes = planes + return layer + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = self.layer5(out) + out = F.avg_pool2d(out, 8) + out = self.linear(out.view(out.size(0), -1)) + return out + + +def PNASNetA(): + return PNASNet(CellA, num_cells=6, num_planes=44) + +def PNASNetB(): + return PNASNet(CellB, num_cells=6, num_planes=32) + + +def test(): + net = PNASNetB() + x = torch.randn(1,3,32,32) + y = net(x) + print(y) + +# test() diff --git a/llvm/projects/pred_tuner/models/torch/preact_resnet.py b/llvm/projects/pred_tuner/models/torch/preact_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..abb1bc313c011d2ee650c353c515e2cd404503f3 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/preact_resnet.py @@ -0,0 +1,118 @@ +'''Pre-activation ResNet in PyTorch. + +Reference: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv:1603.05027 +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class PreActBlock(nn.Module): + '''Pre-activation version of the BasicBlock.''' + expansion = 1 + + def __init__(self, in_planes, planes, stride=1): + super(PreActBlock, self).__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) + ) + + def forward(self, x): + out = F.relu(self.bn1(x)) + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x + out = self.conv1(out) + out = self.conv2(F.relu(self.bn2(out))) + out += shortcut + return out + + +class PreActBottleneck(nn.Module): + '''Pre-activation version of the original Bottleneck module.''' + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super(PreActBottleneck, self).__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) + + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) + ) + + def forward(self, x): + out = F.relu(self.bn1(x)) + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x + out = self.conv1(out) + out = self.conv2(F.relu(self.bn2(out))) + out = self.conv3(F.relu(self.bn3(out))) + out += shortcut + return out + + +class PreActResNet(nn.Module): + def __init__(self, block, num_blocks, num_classes=10): + super(PreActResNet, self).__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + self.linear = nn.Linear(512*block.expansion, num_classes) + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + out = self.conv1(x) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def PreActResNet18(): + return PreActResNet(PreActBlock, [2,2,2,2]) + +def PreActResNet34(): + return PreActResNet(PreActBlock, [3,4,6,3]) + +def PreActResNet50(): + return PreActResNet(PreActBottleneck, [3,4,6,3]) + +def PreActResNet101(): + return PreActResNet(PreActBottleneck, [3,4,23,3]) + +def PreActResNet152(): + return PreActResNet(PreActBottleneck, [3,8,36,3]) + + +def test(): + net = PreActResNet18() + y = net((torch.randn(1,3,32,32))) + print(y.size()) + +# test() diff --git a/llvm/projects/pred_tuner/models/torch/resnet.py b/llvm/projects/pred_tuner/models/torch/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c03ed134293e2a6a1dd373556e83978ef3d560 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/resnet.py @@ -0,0 +1,122 @@ +"""ResNet in PyTorch. + +For Pre-activation ResNet, see 'preact_resnet.py'. + +Reference: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 +""" +import torch.nn as nn +import torch.nn.functional as F + +from models.hpvm import HPVMConvBundle + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = HPVMConvBundle(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.relu1 = nn.ReLU() + self.conv2 = HPVMConvBundle(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + HPVMConvBundle(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion * planes) + ) + self.relu2 = nn.ReLU() + + def forward(self, x): + out = self.relu1(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + out += self.shortcut(x) + out = self.relu2(out) + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, stride=1): + super(Bottleneck, self).__init__() + self.conv1 = HPVMConvBundle(in_planes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = HPVMConvBundle(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = HPVMConvBundle(planes, self.expansion * planes, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(self.expansion * planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + HPVMConvBundle(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion * planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class ResNet(nn.Module): + def __init__(self, block, num_blocks, num_classes=10): + super(ResNet, self).__init__() + self.in_planes = 64 + + self.conv1 = HPVMConvBundle(3, 64, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU() + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + self.avg_pool2d = nn.AvgPool2d(4) + self.linear = nn.Linear(512 * block.expansion, num_classes) + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1] * (num_blocks - 1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + out = self.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = self.avg_pool2d(out) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def ResNet18(): + return ResNet(BasicBlock, [2, 2, 2, 2]) + + +def ResNet34(): + return ResNet(BasicBlock, [3, 4, 6, 3]) + + +def ResNet50(): + return ResNet(Bottleneck, [3, 4, 6, 3]) + + +def ResNet101(): + return ResNet(Bottleneck, [3, 4, 23, 3]) + + +def ResNet152(): + return ResNet(Bottleneck, [3, 8, 36, 3]) diff --git a/llvm/projects/pred_tuner/models/torch/resnext.py b/llvm/projects/pred_tuner/models/torch/resnext.py new file mode 100644 index 0000000000000000000000000000000000000000..7a08f3e7d9fdf3b65aad5b773d4d113c6b796423 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/resnext.py @@ -0,0 +1,95 @@ +'''ResNeXt in PyTorch. + +See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Block(nn.Module): + '''Grouped convolution block.''' + expansion = 2 + + def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1): + super(Block, self).__init__() + group_width = cardinality * bottleneck_width + self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(group_width) + self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) + self.bn2 = nn.BatchNorm2d(group_width) + self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(self.expansion*group_width) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*group_width: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(self.expansion*group_width) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class ResNeXt(nn.Module): + def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10): + super(ResNeXt, self).__init__() + self.cardinality = cardinality + self.bottleneck_width = bottleneck_width + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.layer1 = self._make_layer(num_blocks[0], 1) + self.layer2 = self._make_layer(num_blocks[1], 2) + self.layer3 = self._make_layer(num_blocks[2], 2) + # self.layer4 = self._make_layer(num_blocks[3], 2) + self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes) + + def _make_layer(self, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)) + self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width + # Increase bottleneck_width by 2 after each stage. + self.bottleneck_width *= 2 + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + # out = self.layer4(out) + out = F.avg_pool2d(out, 8) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def ResNeXt29_2x64d(): + return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64) + +def ResNeXt29_4x64d(): + return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64) + +def ResNeXt29_8x64d(): + return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64) + +def ResNeXt29_32x4d(): + return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4) + +def test_resnext(): + net = ResNeXt29_2x64d() + x = torch.randn(1,3,32,32) + y = net(x) + print(y.size()) + +# test_resnext() diff --git a/llvm/projects/pred_tuner/models/torch/senet.py b/llvm/projects/pred_tuner/models/torch/senet.py new file mode 100644 index 0000000000000000000000000000000000000000..98bfa0ca51dcd07b586432c9f9460be8d1f0b745 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/senet.py @@ -0,0 +1,121 @@ +'''SENet in PyTorch. + +SENet is the winner of ImageNet-2017. The paper is not released yet. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class BasicBlock(nn.Module): + def __init__(self, in_planes, planes, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes) + ) + + # SE layers + self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) # Use nn.Conv2d instead of nn.Linear + self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + + # Squeeze + w = F.avg_pool2d(out, out.size(2)) + w = F.relu(self.fc1(w)) + w = F.sigmoid(self.fc2(w)) + # Excitation + out = out * w # New broadcasting feature from v0.2! + + out += self.shortcut(x) + out = F.relu(out) + return out + + +class PreActBlock(nn.Module): + def __init__(self, in_planes, planes, stride=1): + super(PreActBlock, self).__init__() + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + + if stride != 1 or in_planes != planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False) + ) + + # SE layers + self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) + self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1) + + def forward(self, x): + out = F.relu(self.bn1(x)) + shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x + out = self.conv1(out) + out = self.conv2(F.relu(self.bn2(out))) + + # Squeeze + w = F.avg_pool2d(out, out.size(2)) + w = F.relu(self.fc1(w)) + w = F.sigmoid(self.fc2(w)) + # Excitation + out = out * w + + out += shortcut + return out + + +class SENet(nn.Module): + def __init__(self, block, num_blocks, num_classes=10): + super(SENet, self).__init__() + self.in_planes = 64 + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) + self.linear = nn.Linear(512, num_classes) + + def _make_layer(self, block, planes, num_blocks, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, stride)) + self.in_planes = planes + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def SENet18(): + return SENet(PreActBlock, [2,2,2,2]) + + +def test(): + net = SENet18() + y = net(torch.randn(1,3,32,32)) + print(y.size()) + +# test() diff --git a/llvm/projects/pred_tuner/models/torch/shufflenet.py b/llvm/projects/pred_tuner/models/torch/shufflenet.py new file mode 100644 index 0000000000000000000000000000000000000000..acff6f78266c55bb93f5b12a6306a5647ebb0769 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/shufflenet.py @@ -0,0 +1,109 @@ +'''ShuffleNet in PyTorch. + +See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class ShuffleBlock(nn.Module): + def __init__(self, groups): + super(ShuffleBlock, self).__init__() + self.groups = groups + + def forward(self, x): + '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' + N,C,H,W = x.size() + g = self.groups + return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W) + + +class Bottleneck(nn.Module): + def __init__(self, in_planes, out_planes, stride, groups): + super(Bottleneck, self).__init__() + self.stride = stride + + mid_planes = out_planes/4 + g = 1 if in_planes==24 else groups + self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False) + self.bn1 = nn.BatchNorm2d(mid_planes) + self.shuffle1 = ShuffleBlock(groups=g) + self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False) + self.bn2 = nn.BatchNorm2d(mid_planes) + self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False) + self.bn3 = nn.BatchNorm2d(out_planes) + + self.shortcut = nn.Sequential() + if stride == 2: + self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1)) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.shuffle1(out) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + res = self.shortcut(x) + out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res) + return out + + +class ShuffleNet(nn.Module): + def __init__(self, cfg): + super(ShuffleNet, self).__init__() + out_planes = cfg['out_planes'] + num_blocks = cfg['num_blocks'] + groups = cfg['groups'] + + self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(24) + self.in_planes = 24 + self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups) + self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups) + self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups) + self.linear = nn.Linear(out_planes[2], 10) + + def _make_layer(self, out_planes, num_blocks, groups): + layers = [] + for i in range(num_blocks): + stride = 2 if i == 0 else 1 + cat_planes = self.in_planes if i == 0 else 0 + layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups)) + self.in_planes = out_planes + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +def ShuffleNetG2(): + cfg = { + 'out_planes': [200,400,800], + 'num_blocks': [4,8,4], + 'groups': 2 + } + return ShuffleNet(cfg) + +def ShuffleNetG3(): + cfg = { + 'out_planes': [240,480,960], + 'num_blocks': [4,8,4], + 'groups': 3 + } + return ShuffleNet(cfg) + + +def test(): + net = ShuffleNetG2() + x = torch.randn(1,3,32,32) + y = net(x) + print(y) + +# test() diff --git a/llvm/projects/pred_tuner/models/torch/shufflenetv2.py b/llvm/projects/pred_tuner/models/torch/shufflenetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..eefcda32059f0b8575148098c78ff5d84effd388 --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/shufflenetv2.py @@ -0,0 +1,162 @@ +'''ShuffleNetV2 in PyTorch. + +See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details. +''' +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class ShuffleBlock(nn.Module): + def __init__(self, groups=2): + super(ShuffleBlock, self).__init__() + self.groups = groups + + def forward(self, x): + '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' + N, C, H, W = x.size() + g = self.groups + return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W) + + +class SplitBlock(nn.Module): + def __init__(self, ratio): + super(SplitBlock, self).__init__() + self.ratio = ratio + + def forward(self, x): + c = int(x.size(1) * self.ratio) + return x[:, :c, :, :], x[:, c:, :, :] + + +class BasicBlock(nn.Module): + def __init__(self, in_channels, split_ratio=0.5): + super(BasicBlock, self).__init__() + self.split = SplitBlock(split_ratio) + in_channels = int(in_channels * split_ratio) + self.conv1 = nn.Conv2d(in_channels, in_channels, + kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(in_channels) + self.conv2 = nn.Conv2d(in_channels, in_channels, + kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False) + self.bn2 = nn.BatchNorm2d(in_channels) + self.conv3 = nn.Conv2d(in_channels, in_channels, + kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(in_channels) + self.shuffle = ShuffleBlock() + + def forward(self, x): + x1, x2 = self.split(x) + out = F.relu(self.bn1(self.conv1(x2))) + out = self.bn2(self.conv2(out)) + out = F.relu(self.bn3(self.conv3(out))) + out = torch.cat([x1, out], 1) + out = self.shuffle(out) + return out + + +class DownBlock(nn.Module): + def __init__(self, in_channels, out_channels): + super(DownBlock, self).__init__() + mid_channels = out_channels // 2 + # left + self.conv1 = nn.Conv2d(in_channels, in_channels, + kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False) + self.bn1 = nn.BatchNorm2d(in_channels) + self.conv2 = nn.Conv2d(in_channels, mid_channels, + kernel_size=1, bias=False) + self.bn2 = nn.BatchNorm2d(mid_channels) + # right + self.conv3 = nn.Conv2d(in_channels, mid_channels, + kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(mid_channels) + self.conv4 = nn.Conv2d(mid_channels, mid_channels, + kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False) + self.bn4 = nn.BatchNorm2d(mid_channels) + self.conv5 = nn.Conv2d(mid_channels, mid_channels, + kernel_size=1, bias=False) + self.bn5 = nn.BatchNorm2d(mid_channels) + + self.shuffle = ShuffleBlock() + + def forward(self, x): + # left + out1 = self.bn1(self.conv1(x)) + out1 = F.relu(self.bn2(self.conv2(out1))) + # right + out2 = F.relu(self.bn3(self.conv3(x))) + out2 = self.bn4(self.conv4(out2)) + out2 = F.relu(self.bn5(self.conv5(out2))) + # concat + out = torch.cat([out1, out2], 1) + out = self.shuffle(out) + return out + + +class ShuffleNetV2(nn.Module): + def __init__(self, net_size): + super(ShuffleNetV2, self).__init__() + out_channels = configs[net_size]['out_channels'] + num_blocks = configs[net_size]['num_blocks'] + + self.conv1 = nn.Conv2d(3, 24, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(24) + self.in_channels = 24 + self.layer1 = self._make_layer(out_channels[0], num_blocks[0]) + self.layer2 = self._make_layer(out_channels[1], num_blocks[1]) + self.layer3 = self._make_layer(out_channels[2], num_blocks[2]) + self.conv2 = nn.Conv2d(out_channels[2], out_channels[3], + kernel_size=1, stride=1, padding=0, bias=False) + self.bn2 = nn.BatchNorm2d(out_channels[3]) + self.linear = nn.Linear(out_channels[3], 10) + + def _make_layer(self, out_channels, num_blocks): + layers = [DownBlock(self.in_channels, out_channels)] + for i in range(num_blocks): + layers.append(BasicBlock(out_channels)) + self.in_channels = out_channels + return nn.Sequential(*layers) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + # out = F.max_pool2d(out, 3, stride=2, padding=1) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = F.relu(self.bn2(self.conv2(out))) + out = F.avg_pool2d(out, 4) + out = out.view(out.size(0), -1) + out = self.linear(out) + return out + + +configs = { + 0.5: { + 'out_channels': (48, 96, 192, 1024), + 'num_blocks': (3, 7, 3) + }, + + 1: { + 'out_channels': (116, 232, 464, 1024), + 'num_blocks': (3, 7, 3) + }, + 1.5: { + 'out_channels': (176, 352, 704, 1024), + 'num_blocks': (3, 7, 3) + }, + 2: { + 'out_channels': (224, 488, 976, 2048), + 'num_blocks': (3, 7, 3) + } +} + + +def test(): + net = ShuffleNetV2(net_size=0.5) + x = torch.randn(3, 3, 32, 32) + y = net(x) + print(y.shape) + + +# test() diff --git a/llvm/projects/pred_tuner/models/torch/vgg.py b/llvm/projects/pred_tuner/models/torch/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..2650d2f4859bedcef0de53a60c58c36b706148af --- /dev/null +++ b/llvm/projects/pred_tuner/models/torch/vgg.py @@ -0,0 +1,39 @@ +"""VGG11/13/16/19 in Pytorch.""" +import torch.nn as nn +from models.hpvm import HPVMConvBundle + + +cfg = { + 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], + 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], +} + + +class VGG(nn.Module): + def __init__(self, vgg_name): + super(VGG, self).__init__() + self.features = self._make_layers(cfg[vgg_name]) + self.classifier = nn.Linear(512, 10) + + def forward(self, x): + out = self.features(x) + out = out.view(out.size(0), -1) + out = self.classifier(out) + return out + + @staticmethod + def _make_layers(config): + layers = [] + in_channels = 3 + for x in config: + if x == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + layers += [HPVMConvBundle(in_channels, x, kernel_size=3, padding=1), + nn.BatchNorm2d(x), + nn.ReLU(inplace=True)] + in_channels = x + layers += [nn.AvgPool2d(kernel_size=1, stride=1)] + return nn.Sequential(*layers) diff --git a/llvm/projects/pred_tuner/run_tuner.py b/llvm/projects/pred_tuner/run_tuner.py new file mode 100644 index 0000000000000000000000000000000000000000..5470763ae01b73b51702c413bd18254f4c5b0d2f --- /dev/null +++ b/llvm/projects/pred_tuner/run_tuner.py @@ -0,0 +1,305 @@ +#!/usr/bin/env python +# +# Development-time Tuner with Algorithmic Approximations: +# Approximations: Perforation, Sampling with varying knobs for rate, skip offset +import copy +import logging +import os +import shutil +import time +from pathlib import Path +from typing import List, Tuple + +import numpy as np +import opentuner +from opentuner import ConfigurationManipulator, EnumParameter, MeasurementInterface +from opentuner.measurement.inputmanager import FixedInputManager +from opentuner.search.objective import ThresholdAccuracyMinimizeTime +from opentuner.tuningrunmain import TuningRunMain +from torch.nn import Module +from tqdm import tqdm + +from exp import Benchmark, ConfigMeasurer, ExpState, TuningTime, batch_id, bench_tuner_data, is_dev_time +from models import get_all_output, networks, QoS +from toolkit import ConfigT +from toolkit.estimators import WeightedLinearQoSEstimator +from utils import Config, config, reapply_last_config + +msg_logger = logging.getLogger(__name__) +use_proxy = False +n_promise_valid_runs = 30 +confidence_level = 0.95 + + +def init_proxy(ni: ConfigMeasurer, pickle_path: Path): + def acc_crit(inputs_): + return ni.get_qos(inputs_, ni.val_loader) + + def threshold_eval(inputs_): + accs = np.array([acc_crit(x) for x in inputs_]) + return ni.val_qos - accs.mean() < 3.0 + + def run_model(net: Module): + return get_all_output(net, ni.val_loader) + + return WeightedLinearQoSEstimator( + ni.nas, run_model, acc_crit, threshold_eval, confidence_level, storage=pickle_path + ) + + +class Timer: + def __init__(self, timer_state: TuningTime, timer_name: str): + self.timer_state = timer_state + self.name = timer_name + self.start = None + + def __enter__(self): + self.start = time.time() + return self + + def __exit__(self, *args): + end = time.time() + interval = end - self.start + self.timer_state.add_timer(self.name, interval) + + +class TunerDriver: + def __init__(self, bench: Benchmark): + self.bench = bench + msg_logger.info(f"Tuning for model {self.bench.model_name}") + # Initialize folder. + self._init_folder(bench) + # Take a snapshot of current code. + self.take_code_snapshot() + # Initialize network information and qos thresholds + self.net_info = ConfigMeasurer.init_from_bench(self.bench) + qoses = self.net_info.val_qos, self.net_info.test_qos + qos_type = self.net_info.val_qos.__class__ + self.tuner_thres = qos_type.suggested_tuner_thresholds(self.net_info.val_qos) + self.val_thres = qos_type.suggested_val_threshold(self.net_info.val_qos) + self.test_thres = qos_type.suggested_test_threshold(self.net_info.test_qos) + # Tuner states. + self.states = ExpState(bench, qos_type, qoses) + # Current # of iteration. `ProxyTuner` will use this. + self.run_id, self.iter = 0, 0 + # Initialize proxy. + if use_proxy: + self.proxy = init_proxy(self.net_info, self.bench.result_dir / 'proxy.pkl') + else: + self.proxy = None + + @staticmethod + def _init_folder(bench: Benchmark): + def remove_file_or_folder(path: Path): + if path.is_dir(): + shutil.rmtree(child) + elif path.is_file(): + path.unlink() # Removes file despite the surprising name + + pickle_path = bench.result_dir / 'proxy.pkl' + # Remove everything in result folder except pickle file + if bench.result_dir.is_dir(): + msg_logger.warning(f"!Cleaning existing result dir = {bench.result_dir}") + for child in bench.result_dir.glob('*'): + if child == pickle_path: + continue + msg_logger.info(f" !Removing {child}") + remove_file_or_folder(child) + # Create result folder if it doesn't exist + if not bench.result_dir.is_dir(): + msg_logger.info(f"Creating output directory = {bench.result_dir}") + os.makedirs(bench.result_dir) + + def get_default_args(self): + args = opentuner.default_argparser().parse_args() + args.database = f"opentuner.db/{batch_id}.db" + args.test_limit = self.bench.autotuner_runs + parent = Path(args.database).parent + if not parent.is_dir(): + os.makedirs(parent, exist_ok=True) + return args + + def tuner_exec(self): + # Get default opentuner args + args = self.get_default_args() + # Start tuning for each threshold + for i, thres in enumerate(self.tuner_thres): + with Timer(self.states.timers, f"tuning_{i}"): + msg_logger.info( + f"Tuning goal: qos >= {thres}; keeping configs with qos >= {self.val_thres}" + ) + tuner = ProxyTuner(args, self, thres, self.val_thres) + # TuningRunMain.__init__ initializes its own logger, so we'll reapply our settings. + tuning_main = TuningRunMain(tuner, args) + reapply_last_config() + # Unleash the tuner! + tuning_main.main() + # Remove tuner progress bar + tuner.pbar.close() + self.run_id += 1 + self.iter = 0 + # Postprocess configs + self.process_configs() + + def calibrate_write_configs(self, configs: List[Config], is_test_set: bool): + write_to = self.states.tested_configs if is_test_set else self.states.validated_configs + gold_acc = self.net_info.test_qos if is_test_set else self.net_info.val_qos + for cfg in tqdm(configs, leave=False): + cfg = copy.deepcopy(cfg) + cfg: Config + flags = {k: v for k, v in enumerate(cfg.flags)} + measured_acc, confidence = self.net_info.actual_measure( + flags, cfg.total_runs, is_test_set, threshold=self.val_thres + ) + prev_acc = cfg.avg_qos + cfg.update_acc(measured_acc, confidence, gold_acc) + new_acc = cfg.avg_qos + msg_logger.debug(f"{prev_acc} (mean) -> {new_acc} (mean)") + write_to.append(cfg) + write_to.finalize_dump() + + @staticmethod + def filter_configs( + validation: List[Config], test: List[Config], + vali_threshold: QoS, test_threshold: QoS + ) -> Tuple[List[Config], List[Config]]: + # Filter validation and test set by their respective thresholds + filtered_validation = [ + c for c in validation if c.avg_loss <= vali_threshold + ] + filtered_test = [ + c for c in test if c.avg_loss <= test_threshold + ] + # Test configs also need to be a subset of validation configs. + name_to_filtered = {x.fname: x for x in filtered_test} + intersect_names = set(list(name_to_filtered.keys())).intersection( + set((x.fname for x in filtered_validation)) + ) + filtered_test_ = [name_to_filtered[fname] for fname in intersect_names] + return filtered_validation, filtered_test_ + + def process_configs(self): + # Finalize all configs because tuning is done. + # (this may not do anything now but will in the future) + self.states.all_configs.finalize_dump() + all_configs = self.states.all_configs.configs + # Pre-filter configs by a wide pareto margin + filtered_configs = config.is_pareto_efficient(all_configs, ratio=0.05, n_min=50, n_max=50) + msg_logger.info(f"Prefilter yields {len(filtered_configs)} configs from {len(all_configs)}") + self.states.filtered_configs.finalize_dump(with_configs=filtered_configs) + # Calibrate prefiltered configs (validation step) + with Timer(self.states.timers, "validate"): + self.calibrate_write_configs(filtered_configs, is_test_set=False) + validated_configs = self.states.validated_configs.configs + # Calibrate prefiltered configs on test set (test step) + with Timer(self.states.timers, "test"): + self.calibrate_write_configs(filtered_configs, is_test_set=True) + tested_configs = self.states.tested_configs.configs + # Filter valid and test set configs by thresholds + valid_configs, test_configs = self.filter_configs( + validated_configs, tested_configs, self.val_thres, self.test_thres + ) + self.states.valid_configs.finalize_dump(valid_configs) + self.states.test_configs.finalize_dump(test_configs) + # Finalize data input and plot everything. + self.states.finalize_plot() + + def take_code_snapshot(self): + import git + msg_logger.info(f"Taking git snapshot") + ref_dir = self.bench.result_dir / "references" + os.mkdir(ref_dir) + # Write current git commit (SHA id) + repo = git.Repo(search_parent_directories=True) + sha = repo.head.object.hexsha + msg_logger.info(f"Current code is at commit {sha}") + with (ref_dir / 'git_commit.txt').open('w') as f: + f.write(sha) + # Also put all outstanding code change in a diff file. + # This way changes in all git-tracked files are captured. + t = repo.head.commit.tree + with (ref_dir / 'diff.txt').open('w') as f: + f.write(repo.git.diff(t)) + + def make_config_name(self) -> str: + return f"{self.bench.model_name}_{self.run_id}_{self.iter}" + + def get_accuracy(self, cfg: ConfigT) -> Tuple[QoS, QoS, int]: + has_promise_flags = set(cfg.values()).intersection(set(range(1, 7 + 1))) + config_validation_runs = n_promise_valid_runs if has_promise_flags else 1 + if use_proxy: + mean_acc, confidence_acc = self.net_info.proxy_estimate(cfg, self.proxy) + assert has_promise_flags or (mean_acc == confidence_acc) + else: + mean_acc, _ = self.net_info.actual_measure(cfg, 1, is_test_set=False) + confidence_acc = mean_acc + return mean_acc, confidence_acc, config_validation_runs + + +class ProxyTuner(MeasurementInterface): + def __init__(self, args, driver: TunerDriver, tuner_thres: QoS, accept_thres: QoS): + self.tuner_driver = driver + self.model_info = driver.net_info + self.bench = driver.bench + self.tuner_thres = tuner_thres + self.all_configs = driver.states.all_configs + self.pbar = tqdm(total=args.test_limit, leave=False) + objective = ThresholdAccuracyMinimizeTime(tuner_thres.to_scalar()) + input_manager = FixedInputManager(size=driver.bench.get_n_layers()) + super(ProxyTuner, self).__init__( + args, program_name=self.bench.model_name, + input_manager=input_manager, objective=objective + ) + self.accept_thres = accept_thres + + def manipulator(self) -> ConfigurationManipulator: + """Define the search space by creating a ConfigurationManipulator.""" + manipulator = ConfigurationManipulator() + for ext_layer_id, knobs in self.model_info.get_knobs().items(): + manipulator.add_parameter(EnumParameter(ext_layer_id, knobs)) + return manipulator + + def seed_configurations(self): + """Provide baseline config as seed if model uses seed.""" + return [self.bench.get_baseline_config(not is_dev_time)] if self.bench.use_seed else [] + + def run(self, desired_result, input_, limit): + """Run a given configuration then return performance and accuracy.""" + cfg: ConfigT = desired_result.configuration.data + # get_accuracy gives estimation of mean accuracy and 95% confident accuracy + mean_acc, confident_acc, n_runs = self.tuner_driver.get_accuracy(cfg) + # getConfigCost returns the cost associated with the selected configuration + total_comps, speedup = self.bench.compute_config_cost(cfg) + Result = opentuner.resultsdb.models.Result() + Result.time = total_comps + # Convert QoS to scalar, because opentuner does not support custom comparable datatype + Result.accuracy = confident_acc.to_scalar(relative_to=self.tuner_thres) + + # If accuracy is acceptable, write this config + if confident_acc > self.accept_thres: + config_name = self.tuner_driver.make_config_name() + cfg_values = [cfg[layer] for layer in sorted(cfg.keys())] + writing_config = Config( + mean_acc, self.model_info.val_qos, config_name, cfg_values, + n_runs, 95.0, total_comps, speedup + ) + self.all_configs.append(writing_config) + msg_logger.debug( + f"Config chosen with accuracy (mean) = {mean_acc}, (95%) = {confident_acc} " + f"and speedup = {speedup}" + ) + self.tuner_driver.iter += 1 + self.pbar.update() + return Result + + def save_final_config(self, configuration): + """Print final configuration.""" + msg_logger.info(f"Final configuration {configuration.data}") + msg_logger.info("Done with Autotuning run") + + +if __name__ == '__main__': + assert set(networks.keys()).issubset(set(bench_tuner_data.keys())) + for network in ('alexnet2_hpvm',): + bench_: Benchmark = bench_tuner_data[network] + TunerDriver(bench_).tuner_exec() diff --git a/llvm/projects/pred_tuner/tests/data/1_1_output.json b/llvm/projects/pred_tuner/tests/data/1_1_output.json new file mode 100644 index 0000000000000000000000000000000000000000..3892ae9622a1af68e92b11408372e3d88278ed6a --- /dev/null +++ b/llvm/projects/pred_tuner/tests/data/1_1_output.json @@ -0,0 +1,98 @@ +{ + "('0', '0', '1', '1', '2', '0')": { + "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "ConvSampSim": "32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,", + "ConvApprox": "32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,", + "ConvApproxHalf2": "32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000," + }, + "('0', '0', '1', '1', '2', '1')": { + "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "ConvSampSim": "40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,", + "ConvApprox": "40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,", + "ConvApproxHalf2": "40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000,40.000000," + }, + "('0', '0', '1', '1', '3', '0')": { + "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "ConvSampSim": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "ConvApprox": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "ConvApproxHalf2": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000," + }, + "('0', '0', '1', '1', '3', '1')": { + "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "ConvSampSim": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "ConvApprox": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "ConvApproxHalf2": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000," + }, + "('0', '0', '1', '1', '4', '0')": { + "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "ConvSampSim": "32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,", + "ConvApprox": "32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,32.000000,", + "ConvApproxHalf2": "31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375,31.984375," + }, + "('0', '0', '1', '1', '4', '1')": { + "tensorConvolution": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "FP16_Baseline": "36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,36.000000,", + "ConvSampSim": "37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,37.333332,", + "ConvApprox": "37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,37.333336,", + "ConvApproxHalf2": "37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500,37.312500," + }, + "('1', '1', '1', '1', '2', '0')": { + "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000," + }, + "('1', '1', '1', '1', '2', '1')": { + "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,40.000000,40.000000,0.000000,0.000000,0.000000,0.000000,0.000000," + }, + "('1', '1', '1', '1', '3', '0')": { + "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000," + }, + "('1', '1', '1', '1', '3', '1')": { + "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000," + }, + "('1', '1', '1', '1', '4', '0')": { + "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,32.000000,32.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,31.984375,31.984375,0.000000,0.000000,0.000000,0.000000,0.000000," + }, + "('1', '1', '1', '1', '4', '1')": { + "tensorConvolution": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "FP16_Baseline": "0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,36.000000,36.000000,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvSampSim": "0.000000,0.000000,0.000000,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,37.333332,37.333332,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApprox": "0.000000,0.000000,0.000000,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,37.333336,37.333336,0.000000,0.000000,0.000000,0.000000,0.000000,", + "ConvApproxHalf2": "0.000000,0.000000,0.000000,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,37.312500,37.312500,0.000000,0.000000,0.000000,0.000000,0.000000," + } +} diff --git a/llvm/projects/pred_tuner/tests/data/3_3_output.json b/llvm/projects/pred_tuner/tests/data/3_3_output.json new file mode 100644 index 0000000000000000000000000000000000000000..2ccb23c01c7faff1e1c296f5d5bb667633327687 --- /dev/null +++ b/llvm/projects/pred_tuner/tests/data/3_3_output.json @@ -0,0 +1,146 @@ +{ + "('0', '0', '1', '1', '2', '0')": { + "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", + "Baseline": "41.000000,41.000000,41.000000,41.000000,", + "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", + "ConvSampSim": "26.000000,26.000000,26.000000,26.000000,", + "ConvApprox": "26.000000,26.000000,26.000000,26.000000,", + "ConvApproxHalf2": "26.000000,26.000000,26.000000,26.000000," + }, + "('0', '0', '1', '1', '2', '1')": { + "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", + "Baseline": "41.000000,41.000000,41.000000,41.000000,", + "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", + "ConvSampSim": "56.000000,56.000000,56.000000,56.000000,", + "ConvApprox": "56.000000,56.000000,56.000000,56.000000,", + "ConvApproxHalf2": "56.000000,56.000000,56.000000,56.000000," + }, + "('0', '0', '1', '1', '3', '0')": { + "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", + "Baseline": "41.000000,41.000000,41.000000,41.000000,", + "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", + "ConvSampSim": "39.000000,39.000000,39.000000,39.000000,", + "ConvApprox": "39.000000,39.000000,39.000000,39.000000,", + "ConvApproxHalf2": "39.000000,39.000000,39.000000,39.000000," + }, + "('0', '0', '1', '1', '3', '1')": { + "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", + "Baseline": "41.000000,41.000000,41.000000,41.000000,", + "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", + "ConvSampSim": "42.000000,42.000000,42.000000,42.000000,", + "ConvApprox": "42.000000,42.000000,42.000000,42.000000,", + "ConvApproxHalf2": "42.000000,42.000000,42.000000,42.000000," + }, + "('0', '0', '1', '1', '4', '0')": { + "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", + "Baseline": "41.000000,41.000000,41.000000,41.000000,", + "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", + "ConvSampSim": "36.000000,36.000000,36.000000,36.000000,", + "ConvApprox": "36.000000,36.000000,36.000000,36.000000,", + "ConvApproxHalf2": "35.968750,35.968750,35.968750,35.968750," + }, + "('0', '0', '1', '1', '4', '1')": { + "tensorConvolution": "41.000000,41.000000,41.000000,41.000000,", + "Baseline": "41.000000,41.000000,41.000000,41.000000,", + "FP16_Baseline": "41.000000,41.000000,41.000000,41.000000,", + "ConvSampSim": "45.333336,45.333336,45.333336,45.333336,", + "ConvApprox": "45.333336,45.333336,45.333336,45.333336,", + "ConvApproxHalf2": "45.312500,45.312500,45.312500,45.312500," + }, + "('1', '1', '1', '1', '2', '0')": { + "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "ConvSampSim": "12.000000,18.000000,18.000000,12.000000,18.000000,26.000000,26.000000,18.000000,18.000000,26.000000,26.000000,18.000000,12.000000,18.000000,18.000000,12.000000,", + "ConvApprox": "12.000000,18.000000,18.000000,12.000000,18.000000,26.000000,26.000000,18.000000,18.000000,26.000000,26.000000,18.000000,12.000000,18.000000,18.000000,12.000000,", + "ConvApproxHalf2": "12.000000,18.000000,18.000000,12.000000,18.000000,26.000000,26.000000,18.000000,18.000000,26.000000,26.000000,18.000000,12.000000,18.000000,18.000000,12.000000," + }, + "('1', '1', '1', '1', '2', '1')": { + "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "ConvSampSim": "24.000000,36.000000,36.000000,24.000000,36.000000,56.000000,56.000000,36.000000,36.000000,56.000000,56.000000,36.000000,24.000000,36.000000,36.000000,24.000000,", + "ConvApprox": "24.000000,36.000000,36.000000,24.000000,36.000000,56.000000,56.000000,36.000000,36.000000,56.000000,56.000000,36.000000,24.000000,36.000000,36.000000,24.000000,", + "ConvApproxHalf2": "24.000000,36.000000,36.000000,24.000000,36.000000,56.000000,56.000000,36.000000,36.000000,56.000000,56.000000,36.000000,24.000000,36.000000,36.000000,24.000000," + }, + "('1', '1', '1', '1', '3', '0')": { + "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "ConvSampSim": "18.000000,27.000000,27.000000,18.000000,25.500000,39.000000,39.000000,25.500000,25.500000,39.000000,39.000000,25.500000,18.000000,27.000000,27.000000,18.000000,", + "ConvApprox": "18.000000,27.000000,27.000000,18.000000,25.500000,39.000000,39.000000,25.500000,25.500000,39.000000,39.000000,25.500000,18.000000,27.000000,27.000000,18.000000,", + "ConvApproxHalf2": "18.000000,27.000000,27.000000,18.000000,25.500000,39.000000,39.000000,25.500000,25.500000,39.000000,39.000000,25.500000,18.000000,27.000000,27.000000,18.000000," + }, + "('1', '1', '1', '1', '3', '1')": { + "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "ConvSampSim": "18.000000,27.000000,27.000000,18.000000,28.500000,42.000000,42.000000,27.000000,28.500000,42.000000,42.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "ConvApprox": "18.000000,27.000000,27.000000,18.000000,28.500000,42.000000,42.000000,27.000000,28.500000,42.000000,42.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "ConvApproxHalf2": "18.000000,27.000000,27.000000,18.000000,28.500000,42.000000,42.000000,27.000000,28.500000,42.000000,42.000000,27.000000,18.000000,27.000000,27.000000,18.000000," + }, + "('1', '1', '1', '1', '4', '0')": { + "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "ConvSampSim": "16.000000,22.666666,22.666666,13.333333,25.333334,36.000000,36.000000,22.666668,25.333334,36.000000,36.000000,22.666668,18.666666,25.333334,25.333334,16.000000,", + "ConvApprox": "16.000000,22.666666,22.666666,13.333333,25.333334,36.000000,36.000000,22.666668,25.333334,36.000000,36.000000,22.666668,18.666666,25.333334,25.333334,16.000000,", + "ConvApproxHalf2": "16.000000,22.671875,22.671875,13.328125,25.328125,35.968750,35.968750,22.656250,25.328125,35.968750,35.968750,22.656250,18.671875,25.328125,25.328125,16.000000," + }, + "('1', '1', '1', '1', '4', '1')": { + "tensorConvolution": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,18.000000,27.000000,41.000000,41.000000,27.000000,27.000000,41.000000,41.000000,27.000000,18.000000,27.000000,27.000000,18.000000,", + "ConvSampSim": "18.666668,29.333332,29.333332,20.000000,29.333332,45.333336,45.333336,29.333332,29.333332,45.333336,45.333336,29.333332,20.000000,29.333332,29.333332,18.666668,", + "ConvApprox": "18.666668,29.333332,29.333332,20.000000,29.333332,45.333336,45.333336,29.333332,29.333332,45.333336,45.333336,29.333332,20.000000,29.333332,29.333332,18.666668,", + "ConvApproxHalf2": "18.656250,29.343750,29.343750,20.000000,29.328125,45.312500,45.312500,29.343750,29.328125,45.312500,45.312500,29.343750,20.000000,29.328125,29.328125,18.656250," + }, + "('1', '1', '2', '2', '2', '0')": { + "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", + "Baseline": "18.000000,27.000000,27.000000,41.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", + "ConvSampSim": "12.000000,18.000000,18.000000,26.000000,", + "ConvApprox": "12.000000,18.000000,18.000000,26.000000,", + "ConvApproxHalf2": "12.000000,18.000000,18.000000,26.000000," + }, + "('1', '1', '2', '2', '2', '1')": { + "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", + "Baseline": "18.000000,27.000000,27.000000,41.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", + "ConvSampSim": "24.000000,36.000000,36.000000,56.000000,", + "ConvApprox": "24.000000,36.000000,36.000000,56.000000,", + "ConvApproxHalf2": "24.000000,36.000000,36.000000,56.000000," + }, + "('1', '1', '2', '2', '3', '0')": { + "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", + "Baseline": "18.000000,27.000000,27.000000,41.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", + "ConvSampSim": "18.000000,27.000000,25.500000,39.000000,", + "ConvApprox": "18.000000,27.000000,25.500000,39.000000,", + "ConvApproxHalf2": "18.000000,27.000000,25.500000,39.000000," + }, + "('1', '1', '2', '2', '3', '1')": { + "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", + "Baseline": "18.000000,27.000000,27.000000,41.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", + "ConvSampSim": "18.000000,27.000000,28.500000,42.000000,", + "ConvApprox": "18.000000,27.000000,28.500000,42.000000,", + "ConvApproxHalf2": "18.000000,27.000000,28.500000,42.000000," + }, + "('1', '1', '2', '2', '4', '0')": { + "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", + "Baseline": "18.000000,27.000000,27.000000,41.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", + "ConvSampSim": "16.000000,22.666666,25.333334,36.000000,", + "ConvApprox": "16.000000,22.666666,25.333334,36.000000,", + "ConvApproxHalf2": "16.000000,22.671875,25.328125,35.968750," + }, + "('1', '1', '2', '2', '4', '1')": { + "tensorConvolution": "18.000000,27.000000,27.000000,41.000000,", + "Baseline": "18.000000,27.000000,27.000000,41.000000,", + "FP16_Baseline": "18.000000,27.000000,27.000000,41.000000,", + "ConvSampSim": "18.666668,29.333332,29.333332,45.333336,", + "ConvApprox": "18.666668,29.333332,29.333332,45.333336,", + "ConvApproxHalf2": "18.656250,29.343750,29.328125,45.312500," + } +} \ No newline at end of file diff --git a/llvm/projects/pred_tuner/tests/data/promise.json b/llvm/projects/pred_tuner/tests/data/promise.json new file mode 100644 index 0000000000000000000000000000000000000000..331ff8527a17a4ff26965e7252cc49a4c409375a --- /dev/null +++ b/llvm/projects/pred_tuner/tests/data/promise.json @@ -0,0 +1,121 @@ +{ + "1": [ + [ + -0.980938, + -1.976522, + -2.999873, + -4.095768, + -5.115182, + 0.0, + 5.075658, + 3.972848, + 2.912783, + 2.051733, + 1.004169, + 1.002379 + ], + 45.213196 + ], + "2": [ + [ + -1.017428, + -2.01491, + -2.951011, + -4.042611, + -4.954911, + 0.0, + 5.05412, + 3.951638, + 2.94989, + 1.99723, + 1.001167, + 0.98796 + ], + 12.535809 + ], + "3": [ + [ + -1.003108, + -2.006269, + -3.00263, + -3.97216, + -4.969401, + 0.0, + 5.012199, + 4.028375, + 2.950729, + 2.004691, + 1.004823, + 0.991805 + ], + 4.886813 + ], + "4": [ + [ + -1.006497, + -1.975768, + -3.031142, + -4.02248, + -5.061712, + 0.0, + 5.017349, + 3.992676, + 2.998843, + 2.002693, + 0.997514, + 1.00649 + ], + 3.129643 + ], + "5": [ + [ + -1.001629, + -1.976943, + -2.982565, + -3.964559, + -4.99636, + 0.0, + 4.992359, + 3.984341, + 2.990126, + 2.005831, + 1.000539, + 1.003548 + ], + 2.181237 + ], + "6": [ + [ + -1.003159, + -1.985892, + -3.005964, + -4.008651, + -4.992874, + 0.0, + 4.996098, + 4.012099, + 3.001986, + 2.001431, + 0.996138, + 0.997394 + ], + 1.362949 + ], + "7": [ + [ + -1.003133, + -1.99733, + -3.00755, + -4.007799, + -5.003314, + 0.0, + 5.000926, + 3.993208, + 2.988745, + 2.00329, + 0.99986, + 0.995669 + ], + 0.6926 + ] +} \ No newline at end of file diff --git a/llvm/projects/pred_tuner/tests/data/quantization.json b/llvm/projects/pred_tuner/tests/data/quantization.json new file mode 100644 index 0000000000000000000000000000000000000000..723eaa2b55bc067689beae34829d27d478a0c727 --- /dev/null +++ b/llvm/projects/pred_tuner/tests/data/quantization.json @@ -0,0 +1,58 @@ +{ + "(-4, 6)": [ + -0.132812, + -4.0, + 0.179688, + -0.40625, + 1.664062, + -2.90625, + 0.6875, + 0.960938, + 6.0, + 6.0, + 2.484375, + 2.992188 + ], + "(-2, 2)": [ + -0.109375, + -2.0, + 0.1875, + -0.40625, + 1.6875, + -2.0, + 0.6875, + 0.984375, + 2.0, + 2.0, + 2.0, + 2.0 + ], + "(-25, 8)": [ + -0.121094, + -25.0, + 0.136719, + -0.507812, + 1.683594, + -2.957031, + 0.652344, + 0.910156, + 6.96875, + 7.097656, + 2.457031, + 2.972656 + ], + "(-10, 10)": [ + -0.15625, + -10.0, + 0.15625, + -0.46875, + 1.640625, + -2.96875, + 0.625, + 0.9375, + 6.953125, + 7.1875, + 2.5, + 2.96875 + ] +} \ No newline at end of file diff --git a/llvm/projects/pred_tuner/tests/promise.py b/llvm/projects/pred_tuner/tests/promise.py new file mode 100644 index 0000000000000000000000000000000000000000..59506d94251bfac4909b2236dc9480eb17b9ed70 --- /dev/null +++ b/llvm/projects/pred_tuner/tests/promise.py @@ -0,0 +1,87 @@ +import json +from pathlib import Path + +import torch + +from toolkit import ModuleIndexer, NetApproxSelector +from toolkit.approxdnn import PromiseSim, quantize_256 +from utils import compute_accuracy, init_by_name, run_concat_output + +eps = 1e-5 +delta = 0.05 # Allow for some variance in promise testing + + +def gt_eps(tensor: torch.Tensor) -> bool: + return torch.any(tensor.abs() > eps).item() + + +def compare_quant(groundtruth: dict): + input_tensor = torch.tensor([-0.1, -25, 0.2, -0.4, 1.7, -2.9, 0.7, 0.99, 7, 7.2, 2.5, 3]) + for k, v in groundtruth.items(): + from ast import literal_eval as make_tuple + gt = torch.tensor(v) + ours = quantize_256(input_tensor, *make_tuple(k)) + if gt_eps(gt - ours): + print( + f"Quantization results differ by more than eps = {eps};\n" + f"parameters = {k}\ngroundtruth = {gt}\nours = {ours}" + ) + raise RuntimeError + + +def compare_promise(groundtruth: dict): + input_tensor = torch.tensor([-1, -2, -3, -4, -5, 0, 5, 4, 3, 2, 1, 1], dtype=torch.float) + N = 1000 + for k, (gt_avg, gt_error) in groundtruth.items(): + gt_avg = torch.tensor(gt_avg) + sum_, our_error = torch.zeros_like(input_tensor, dtype=torch.float), 0 + for _ in range(N): + out = PromiseSim.add_promise_noise(input_tensor, int(k)) + sum_ += out + our_error += torch.sum((out - input_tensor) ** 2).item() + our_avg = sum_ / N + our_error = our_error / N + print(gt_avg, our_avg) + if abs(our_error - gt_error) > delta * max(our_error, gt_error): + print( + f"Promise results differ by more than delta = {delta * 100:.1f}%;\n" + f"swing = {k}, groundtruth error = {gt_error}\nours = {our_error}" + ) + raise RuntimeError + + +def is_in_range(mean1: float, std1: float, mean2: float) -> bool: + return mean1 - 3.0 * std1 < mean2 < mean1 + 3.0 * std1 + + +def compare_accuracy(): + baseline, testloader, _, shapes = init_by_name('lenet_hpvm') + baseline_dag = ModuleIndexer(baseline) + nas = NetApproxSelector(baseline_dag, dev_time_only=False) + # {0: 1} -> 98.4808 0.1195 + approx1 = nas.apply_approx_by_config({3: 1}) + acc1 = compute_accuracy(run_concat_output(approx1.module, testloader), testloader) + assert is_in_range(0.984808, 0.001195, acc1) + # {0: 2} -> 99.5933 0.0519 + approx2 = nas.apply_approx_by_config({3: 2}) + acc2 = compute_accuracy(run_concat_output(approx2.module, testloader), testloader) + assert is_in_range(0.995933, 0.000519, acc2) + # {0: 3} -> 99.6723 0.0347 + approx3 = nas.apply_approx_by_config({3: 3}) + acc3 = compute_accuracy(run_concat_output(approx3.module, testloader), testloader) + assert is_in_range(0.996723, 0.000347, acc3) + print("Accuracy test passed.") + + +def main(): + data_folder = Path(__file__).parent / 'data' + with open(data_folder / 'quantization.json') as f: + compare_quant(json.load(f)) + with open(data_folder / 'promise.json') as f: + compare_promise(json.load(f)) + compare_accuracy() + print("Tests passed.") + + +if __name__ == '__main__': + main() diff --git a/llvm/projects/pred_tuner/tests/resnet50.py b/llvm/projects/pred_tuner/tests/resnet50.py new file mode 100644 index 0000000000000000000000000000000000000000..71711fbfd099d47ba047471ddde3423b297d0f56 --- /dev/null +++ b/llvm/projects/pred_tuner/tests/resnet50.py @@ -0,0 +1,33 @@ +from toolkit import ModuleIndexer, NetApproxSelector +from utils import compute_accuracy, init_by_name, run_concat_output + + +def float_eq(f1, f2): + return abs(f1 - f2) < 1e-5 + + +def main(): + baseline, testloader, _, shapes = init_by_name('resnet50_imagenet_hpvm') + baseline_dag = ModuleIndexer(baseline) + nas = NetApproxSelector(baseline_dag) + # baseline + baseline_output = run_concat_output(baseline_dag.module, testloader) + baseline_acc = compute_accuracy(baseline_output, testloader) + assert float_eq(baseline_acc, 0.773) + # {13: 242} -> 75.5 + approx1 = nas.apply_approx_by_config({82: 242}) + acc1 = compute_accuracy(run_concat_output(approx1.module, testloader), testloader) + assert float_eq(acc1, 0.755) + # {13: 242, 17: 247} -> 74.6 + approx2 = nas.apply_approx_by_config({82: 242, 108: 247}) + acc2 = compute_accuracy(run_concat_output(approx2.module, testloader), testloader) + assert float_eq(acc2, 0.746) + # {9: 237, 13: 242, 17: 247} -> 74.1 + approx3 = nas.apply_approx_by_config({55: 237, 82: 242, 108: 247}) + acc3 = compute_accuracy(run_concat_output(approx3.module, testloader), testloader) + assert float_eq(acc3, 0.741) + print("Accuracy test passed.") + + +if __name__ == '__main__': + main() diff --git a/llvm/projects/pred_tuner/tests/sampling.py b/llvm/projects/pred_tuner/tests/sampling.py new file mode 100644 index 0000000000000000000000000000000000000000..707506ef7b8312fda02ca646bd04d034c3eff6ea --- /dev/null +++ b/llvm/projects/pred_tuner/tests/sampling.py @@ -0,0 +1,90 @@ +import json +from copy import deepcopy +from pathlib import Path +from typing import Tuple + +import torch + +from models.hpvm import HPVMConvBundle +from toolkit import Conv2dSampling, Conv2dSamplingFP16, FP16Approx + +eps = 1e-5, 0.05 + + +def sampling_3_3_consts() -> Tuple[torch.Tensor, torch.Tensor]: + input_tensor = torch.ones(1, 3, 4, 4) + # Filter has value [2, 1, 2, 1, 2, 1...] + filter_tensor = torch.ones(1, 3, 3, 3) + filter_tensor.view(-1)[::2] = 2 + return input_tensor, filter_tensor + + +def sampling_1_1_consts() -> Tuple[torch.Tensor, torch.Tensor]: + input_tensor = torch.ones(1, 9, 2, 2) * 2 + filter_tensor = torch.ones(4, 9, 1, 1) * 2 + return input_tensor, filter_tensor + + +def parse_tensor_str(string: str) -> torch.Tensor: + # String has an extra ',' at the end, so skipping an empty string after split + entries = [float(s) for s in string.split(',')[:-1]] + return torch.tensor(entries).cuda() + + +def compare_to_groundtruth(groundtruth: dict, const_func): + input_tensor, filter_tensor = const_func() + input_tensor = input_tensor.cuda() + o_ch, i_ch, h, w = filter_tensor.size() + assert h == w + for k, v in groundtruth.items(): + def compare(groundtruth_t: torch.Tensor, ours_t: torch.Tensor, is_fp16: bool): + diff = groundtruth_t - ours_t + eps_ = eps[1] if is_fp16 else eps[0] + is_diff = torch.any(diff.abs() > eps_).item() + if is_diff: + print( + f"Results differ by more than eps = {eps};\n" + f"parameters = {k}\n" + f"groundtruth = {groundtruth_t}\n" + f"ours = {ours_t}" + ) + raise RuntimeError + + from ast import literal_eval as make_tuple + pad_h, pad_w, stride_h, stride_w, skip_every, offset = [int(s) for s in make_tuple(k)] + conv_layer = HPVMConvBundle( + i_ch, o_ch, h, stride=(stride_h, stride_w), padding=(pad_h, pad_w) + ) + conv_layer.weight.data = filter_tensor + conv_layer.bias.data = torch.zeros_like(conv_layer.bias.data) + conv_layer = conv_layer.cuda() + our_baseline = conv_layer(input_tensor).flatten() + fp16 = FP16Approx(deepcopy(conv_layer)) + our_fp16 = fp16(input_tensor).flatten() + sampling = Conv2dSampling(skip_every, offset, 1.0, deepcopy(conv_layer)) + our_sampled = sampling(input_tensor).flatten() + sampling_fp16 = Conv2dSamplingFP16(skip_every, offset, 1.0, deepcopy(conv_layer)) + our_sampled_fp16 = sampling_fp16(input_tensor).float().flatten() + groundtruth_baseline = parse_tensor_str(v['Baseline']) + compare(groundtruth_baseline, our_baseline, False) + groundtruth_sampled1 = parse_tensor_str(v['ConvApprox']) + compare(groundtruth_sampled1, our_sampled, False) + groundtruth_sampled2 = parse_tensor_str(v['ConvSampSim']) + compare(groundtruth_sampled2, our_sampled, False) + groundtruth_baseline_fp16 = parse_tensor_str(v['FP16_Baseline']) + compare(groundtruth_baseline_fp16, our_fp16, True) + groundtruth_sampled_fp16 = parse_tensor_str(v['ConvApproxHalf2']) + compare(groundtruth_sampled_fp16, our_sampled_fp16, True) + + +def main(): + data_folder = Path(__file__).parent / 'data' + with open(data_folder / '1_1_output.json') as f: + compare_to_groundtruth(json.load(f), sampling_1_1_consts) + with open(data_folder / '3_3_output.json') as f: + compare_to_groundtruth(json.load(f), sampling_3_3_consts) + print("Tests passed.") + + +if __name__ == '__main__': + main() diff --git a/llvm/projects/pred_tuner/toolkit/__init__.py b/llvm/projects/pred_tuner/toolkit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..892b8c154269c99b7446c70182886b2ee92fc499 --- /dev/null +++ b/llvm/projects/pred_tuner/toolkit/__init__.py @@ -0,0 +1,4 @@ +from .approxdnn import Approximation, AvailableApproximations, Conv2dSampling, FP16Approx, \ + PerforateConv2dStride, PromiseSim +from .estimators import LinearCombEstimator, LinearEstimator, LinearQoSEstimator, WeightedLinearCombEstimator +from .transform import ConfigT, NetApproxSelector, StateCapturer diff --git a/llvm/projects/pred_tuner/toolkit/approxdnn.py b/llvm/projects/pred_tuner/toolkit/approxdnn.py new file mode 100644 index 0000000000000000000000000000000000000000..06abca85d521326749902e0058b8a88e3571a611 --- /dev/null +++ b/llvm/projects/pred_tuner/toolkit/approxdnn.py @@ -0,0 +1,442 @@ +"""All approximation techniques for torch.nn layers.""" +import abc +from typing import Dict, Iterable, List, Optional, Type + +import torch +from torch.nn import Linear, Module + +from models.hpvm import HPVMConvBundle +from utils import get_tensorrt_dir + + +def interpolate_first_dim(tensor: torch.Tensor, interp_indices: Iterable[int]): + def tensor_at(idx_: int): + if idx_ in interp_indices: + raise IndexError + if idx_ < 0 or idx_ >= tensor.size()[0]: + return torch.zeros_like(tensor[0]) + return tensor[idx_] + + for idx in interp_indices: + if idx < 0 or idx >= tensor.size()[0]: + raise IndexError + elif idx == 0: # First row + tensor[idx] = tensor_at(1) + elif idx == tensor.size()[0] - 1: # Last row + tensor[idx] = tensor_at(idx - 1) + else: # Middle rows + tensor[idx] = (tensor_at(idx - 1) + tensor_at(idx + 1)) / 2.0 + return tensor + + +class Approximation(abc.ABC): + @property + @abc.abstractmethod + def deterministic(self) -> bool: + pass + + @property + @abc.abstractmethod + def devtime(self) -> bool: + pass + + @property + @abc.abstractmethod + def fp32(self) -> bool: + pass + + @abc.abstractmethod + def apply(self, module: Module) -> Module: + pass + + @abc.abstractmethod + def is_less_approx(self, other: 'Approximation') -> Optional[bool]: + pass + + def __repr__(self): + return f"{self.__class__}({self.__dict__})" + + +class PerforateConv2dStride(Approximation): + r"""Simulation of strided perforated convolution for `torch.nn.Conv2d`. + + Perforated convolution skips computing some entries in the output and instead interpolates + these values, to reduce the number of float-ops needed to complete a convolution op. + In this implementation, selected rows or columns of the output are discarded and replaced + with linearly interpolated values from the neighboring rows or columns. Each channel is + considered independently. + This implementation gives the same output as actual perforated convolution but without the + performance benefit. + + Parameters + ---------- + direction_is_row : bool + If True, discard and interpolate rows, otherwise columns. + stride : int \in [2, +\infty) + Skip 1 row/column in the convolution kernel per `stride` elements. + offset : int \in [0, stride) + Skipped first row/column is `offset`. + + Attributes + ---------- + interp_axis : int :math:`\in \{2, 3\}` + The axis that will be perforated over. As the input is an NCHW tensor, if + `direction_is_row` then `interp_axis = 2`, otherwise `interp_axis = 3`. + stride : int :math:`\in [2, +\infty)` + Equal to parameter `stride`. + offset : int :math:`\in [0, stride)` + Equal to parameter `offset`. + """ + + def __init__(self, direction_is_row: bool, stride: int, offset: int, use_fp16: bool): + assert stride >= 2 + assert 0 <= offset < stride + self.interp_axis = 2 if direction_is_row else 3 + self.stride = stride + self.offset = offset + self.fp16 = use_fp16 + + @property + def deterministic(self) -> bool: + return True + + @property + def devtime(self) -> bool: + return not self.fp16 + + @property + def fp32(self) -> bool: + return not self.fp16 + + def is_less_approx(self, other: Approximation) -> Optional[bool]: + return None + + class PerforateConv2dStrideModule(Module): + def __init__(self, conv: HPVMConvBundle, approx: 'PerforateConv2dStride'): + super().__init__() + self.conv = conv + self.approx = approx + if self.approx.fp16: + self.conv = self.conv.half() + + def forward(self, x: torch.Tensor): + if self.approx.fp16: + x = x.half() + x = self.conv.input_to_conv(x) + assert x.dim() == 4 + # Put self.approx.interp_axis to first axis temporarily + x = x.transpose(0, self.approx.interp_axis) + interp_indices = torch.tensor(range(self.approx.offset, x.size(0), self.approx.stride)) + x = interpolate_first_dim(x, interp_indices) + # Putting axes back + x = x.transpose(0, self.approx.interp_axis) + x = self.conv.conv_to_output(x) + if self.approx.fp16: + assert x.dtype == torch.float16 + return x.float() + + def apply(self, module: HPVMConvBundle) -> PerforateConv2dStrideModule: + return self.PerforateConv2dStrideModule(module, self) + + +class Conv2dSampling(Approximation): + r"""Simulation of sampled convolution for `torch.nn.Conv2d`. + + Skips some elements of the convolution kernel in a uniform, strided manner, + to reduce the amount of float-ops needed to compute each output entry. + This implementation gives the same output as actual sampled convolution but without the + performance benefit. + + Parameters + ---------- + skip_every: int + Skip 1 element in the convolution kernel per `skip_every` elements. + skip_offset : int :math:`\in [0, +\infty)` + Index of first element to be skipped. + For example, if `skip_every = 3` and `skip_offset = 1`, then indices skipped + will be [1, 4, 7, ...] + interp_rate : float + The weight will be compensated ("interpolated") with a ratio after skipping elements, + which is naturally equal to :math:`1 + (1 / (skip_every - 1)`. + `interp_rate` modifies this rate to :math:`1 + (1 / (skip_every - 1) \times interp_rate`. + use_fp16 : bool + Whether to use fp16 weight/input or not. + """ + + def __init__( + self, skip_every: int, skip_offset: int, interp_rate: float, use_fp16: bool + ): + assert skip_every >= 2 and skip_offset >= 0 + self.skip_every = skip_every + self.skip_offset = skip_offset + self.interp_rate = interp_rate + self.fp16 = use_fp16 + + @property + def deterministic(self) -> bool: + return True + + @property + def devtime(self) -> bool: + return not self.fp16 + + @property + def fp32(self) -> bool: + return not self.fp16 + + def is_less_approx(self, other: Approximation) -> Optional[bool]: + return None + + @staticmethod + def sample_conv_weight( + interp_rate: float, skip_every: int, skip_offset: int, weight: torch.Tensor + ): + r"""Samples (skips & interpolates) convolution kernel according to parameters. + + For a given `weight` tensor of shape `(C1, C2, H, W)`, sample each output channel + (on axis 0) independently. + Flatten each output channel tensor into 1 dim. + In normal cases, set elements at indices ``range(skip_offset, C_2 * H * W, skip_every)`` + to 0. + However, if `skip_every` == `h` == `w` == 3, we may end up skipping the same whole rows for + each input channel, which is undesirable. + Instead, increment the offset by 1 for each input channel. + Last, multiplies the kernel by the inverse ratio of elements dropped for an interpolation. + """ + if len(weight.shape) != 4: + raise ValueError("Conv2d weight should be 4-dimensional") + c1, c2, h, w = weight.shape + if skip_every == h == w == 3: + # Indices (0..h*w) to skip for each input channel + per_chan_skip_indices = [ + range((i_chan + skip_offset) % skip_every, h * w, skip_every) + for i_chan in range(c2) + ] + # Indices (0..c2*h*w) for each output channel, created by adding i*h*w for ith channel. + skip_indices = torch.tensor([ + x + i * h * w for i, per_chan in enumerate(per_chan_skip_indices) + for x in per_chan + ]) + else: + # Indices (0..c2*h*w) to skip for each output channel + skip_indices = torch.arange(skip_offset, c2 * h * w, skip_every) + flat_weight = weight.reshape(c1, -1) + flat_weight[:, skip_indices] = 0 + interp_rate = 1 + (1 / (skip_every - 1) * interp_rate) + flat_weight *= interp_rate + return flat_weight.reshape_as(weight) + + def apply(self, module: HPVMConvBundle) -> HPVMConvBundle: + # Not copying weight tensor leads to memory leak + cloned_conv_w = module.weight.clone().detach() + module.weight.data = self.sample_conv_weight( + self.interp_rate, self.skip_every, self.skip_offset, cloned_conv_w + ) + return module + + +def quantize_256(tensor: torch.Tensor, range_min: float, range_max: float) -> torch.Tensor: + """Quantize a tensor so that only 256 unique float value exists.""" + quantize_range = 256 + input_range = range_max - range_min + mul = input_range / quantize_range + # Map tensor into [0, 256] range. + affined = (tensor - range_min) / mul + # Convert tensor to int and back to float so it will have + # 256 (actually 257!; following hpvm impl) unique float values [0, 256]. + # Then reverse affine it to the original range. + quanted = torch.floor(affined).to(torch.int).to(torch.float) + quanted_float = quanted * mul + range_min + # Clip tensor + return torch.clamp(quanted_float, range_min, range_max) + + +class PromiseSim(Approximation): + scaling_values = [0.75, 0.64, 0.336, 0.21, 0.168, 0.14, 0.11, 0.0784, 0.005] + + def __init__(self, noise_level: int): + super().__init__() + self.noise_level = noise_level + + @property + def deterministic(self) -> bool: + return False + + @property + def devtime(self) -> bool: + return False + + @property + def fp32(self) -> bool: + return False + + def is_less_approx(self, other: Approximation) -> Optional[bool]: + if isinstance(other, PromiseSim): + return self.noise_level > other.noise_level + return None + + def add_promise_noise(self, tensor: torch.Tensor): + scale = self.scaling_values[self.noise_level] + noise = torch.normal( + mean=0.0, std=scale, size=tensor.size(), device=tensor.device + ) + return noise * tensor + tensor + + class PromiseSimModule(Module): + def __init__(self, module: HPVMConvBundle, approx: 'PromiseSim'): + super().__init__() + self.input_r, weight_r, bias_r, self.output_r = module.conv_ranges + module.weight.data = quantize_256(module.weight, *weight_r) + if module.bias is not None: + module.bias.data = quantize_256(module.bias, *bias_r) + self.module = module + self.approx = approx + + def forward(self, input_: torch.Tensor) -> torch.Tensor: + # Quantize input, weight, bias (see __init__), and add noise to input. + input_ = quantize_256(input_, *self.input_r) + input_ = self.approx.add_promise_noise(input_) + output = self.module(input_) + # Then again, quantize output. + return quantize_256(output, *self.output_r) + + def apply(self, module: HPVMConvBundle) -> PromiseSimModule: + return self.PromiseSimModule(module, self) + + +class FP16Approx(Approximation): + def __init__(self): + super().__init__() + + @property + def deterministic(self) -> bool: + return True + + @property + def devtime(self) -> bool: + return False + + @property + def fp32(self) -> bool: + return False + + def is_less_approx(self, other: Approximation) -> Optional[bool]: + return None + + class FP16ApproxModule(Module): + def __init__(self, module: Module): + super().__init__() + self.module = module.half() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x: torch.Tensor = self.module(x.half()) + assert x.dtype == torch.float16 + return x.float() + + def apply(self, module: Module) -> FP16ApproxModule: + return self.FP16ApproxModule(module) + + +AllApproxesT = Dict[int, Approximation] +TypeApproxesT = Dict[Type[Module], List[int]] + + +class AvailableApproximations: + r"""Holds a list of all available "approximation info": approximation + properties. + + For properties see `Approximation`. + + Parameters + ---------- + all_knobs: Dict[int, Approximation] + A dict from int index to (approximation, is_dev_time) pair. + Also see class function `from_global_knobs_file`. + + Attributes + ---------- + all_knobs : Dict[int, Approximation] + A mapping from approximation index to approximation info pair `(approximation, is_dev_time)`. + type_to_knobs : Dict[Type[Module], List[int]] + A mapping from network layer type (subtype of `torch.nn.Module`) to a list of indexes of + applicable approximations. Values of `type_to_knobs` are always valid keys in `all_knobs`. + """ + + def __init__(self, all_knobs: Dict[int, Approximation], type_to_knobs: TypeApproxesT): + self.all_knobs = all_knobs + self.type_to_knobs = type_to_knobs + + @classmethod + def from_global_knobs_file(cls) -> 'AvailableApproximations': + """Read and parse global_knobs.txt to provide all knobs supported and their indexes. + + Returns two things: + * Dict of indexes to (approximations, is_dev_time). Approximation is in the form of functions + with a layer input; see `ModuleReplacerT`. + * Dict of type of torch.nn.Module to a list of approximation indexes that can be applied to this + type of layer. + """ + with (get_tensorrt_dir() / 'autotuner/data/global_knobs.txt').open() as f: + lines = f.readlines() + all_knobs = {} + promise_and_fp16 = [] + for line in lines: + desc, knobs, _, _, _, _, _ = line.rstrip().split() + category, index = desc.split(',') + index = int(index) + if category in ('perf', 'perf_fp16'): + row, col, offset = [int(s) for s in knobs.split(',')] + if row > 1 and col > 1: + raise ValueError("Perforation on both row and column is not supported") + if col == 1: + direction_is_row, stride = True, row + else: + direction_is_row, stride = False, col + all_knobs[index] = PerforateConv2dStride( + direction_is_row, stride, offset, 'fp16' in category + ) + elif category in ('samp', 'samp_fp16'): + stride, offset, interp_rate = knobs.split(',') + stride, offset, interp_rate = int(stride), int(offset), float(interp_rate) + all_knobs[index] = Conv2dSampling( + stride, offset, interp_rate, 'fp16' in category + ) + elif category == 'swing_level': + all_knobs[index] = PromiseSim(index) + promise_and_fp16.append(index) + elif category == 'fp16': + all_knobs[index] = FP16Approx() + promise_and_fp16.append(index) + type_to_knobs = { + HPVMConvBundle: list(all_knobs.keys()), + Linear: promise_and_fp16 + } + return cls(all_knobs, type_to_knobs) + + def items(self, dev_time: bool, ignore_fp32: bool) -> Dict[Type[Module], List[int]]: + """Give a list of applicable approximations for each layer type. + + If dev_time is True, returns only devtime approximations, otherwise all approximations. + """ + + def remove_non_dev(type_to_knobs: TypeApproxesT) -> TypeApproxesT: + return { + k: [v for v in vs if self.all_knobs[v].devtime] + for k, vs in type_to_knobs.items() + } + + def remove_fp32(type_to_knobs: TypeApproxesT) -> TypeApproxesT: + return { + k: [v for v in vs if not self.all_knobs[v].fp32] + for k, vs in type_to_knobs.items() + } + + type_to_knobs_ = self.type_to_knobs + if dev_time: + type_to_knobs_ = remove_non_dev(type_to_knobs_) + if ignore_fp32: + type_to_knobs_ = remove_fp32(type_to_knobs_) + return type_to_knobs_ + + def __getitem__(self, item: int) -> Approximation: + """Returns the approximation info for given approximation index.""" + return self.all_knobs[item] diff --git a/llvm/projects/pred_tuner/toolkit/estimators.py b/llvm/projects/pred_tuner/toolkit/estimators.py new file mode 100644 index 0000000000000000000000000000000000000000..acd35331693c706df336a6e3a33d1c6098a6cb50 --- /dev/null +++ b/llvm/projects/pred_tuner/toolkit/estimators.py @@ -0,0 +1,383 @@ +import abc +import gc +import logging +import pickle +from math import sqrt +from pathlib import Path +from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, TypeVar + +import numpy as np +import torch +from torch.nn import Module +from tqdm import tqdm, trange + +from models.domains import QoS, qos_stats +from .transform import ConfigT, NetApproxSelector + +ProfT = TypeVar('ProfT') +NetOutputT = TypeVar('NetOutputT') +QoST = Callable[[NetOutputT], QoS] +ThresholdEvalT = Callable[[NetOutputT], bool] +ExeT = Callable[[Module], NetOutputT] +KeyT = Tuple[int, int] +KVT = Tuple[KeyT, NetOutputT] +EstmT = Tuple[QoS, QoS] + +msg_logger = logging.getLogger(__name__) + + +class LinearEstimator(abc.ABC): + """Estimate QoS of a config by linearly adding "something" from each approximation of config, and + then applying QoS metric. + + That "something" could be QoS itself (see `LinearQoSEstimator`), or the direct tensor output from + the model (see `LinearTensorEstimator`). + In initialization phase, run the model for each 1-approximation config and store the quantity to + be linearly summed in a table. + + Parameters + ---------- + nas: NetApproxSelector + `NetApproxSelector` instance is used to select all 1-approximation configs and evaluate them. + qos: Callable[[torch.Tensor], float] + Quality of Service measure (such as accuracy). Takes model output tensor and returns QoS value. + independent_init: bool + If False, don't initialize self.profile_table, and wait for `coinit_estimators` to fill in + the profile. `coinit_estimators` must be manually called if `init_profile` is False. + + Attributes + ---------- + qos : Callable[[torch.Tensor], float] + Same as parameter `qos`. + baseline_profile : T + Profile value of the baseline model. + profile_table : Dict[KeyT, T] + A mapping from (`layer_idx`, `approx_idx`) to the profile value, with only this approximation + applied (in other words, with configuration ``{layer_idx: approx_idx}`` applied). + """ + + n_nondeterm_runs = 10 + + def __init__( + self, nas: NetApproxSelector, executor: ExeT, qos: QoST, + threshold_eval: ThresholdEvalT, confidence_level: float, + independent_init: bool = True, storage: Path = None + ): + self.nas = nas + self.qos = qos + self.executor = executor + self.storage = storage + self.baseline_profile: ProfT = self.get_baseline_profile() + self.profile_table: Dict[KeyT, ProfT] = {} + self.confidence_level = confidence_level + if independent_init: + for (k, i), output in self._get_all_outputs(nas, self.executor, threshold_eval, storage): + self.profile_table[k, i] = self.handle_output(output) + + @staticmethod + def _load_from_pickle(storage: Path) -> Iterator[KVT]: + if not storage.is_file(): + return + msg_logger.info(f"Found pickle at {storage}") + with storage.open('rb') as f: + while True: + try: + key, tensor = pickle.load(f) + yield key, tensor + except EOFError: + return + + @classmethod + def run_model(cls, nas: NetApproxSelector, config: ConfigT, executor: ExeT) -> torch.Tensor: + is_deterministic = nas.is_deterministic(config) + model = nas.apply_approx_by_config(config).module + if is_deterministic: + ret = executor(model).unsqueeze(0).cpu() + else: + assert cls.n_nondeterm_runs > 0 + ret = torch.stack([ + executor(model) + for _ in trange(cls.n_nondeterm_runs, leave=False) + ]).cpu() + gc.collect() + return ret + + @classmethod + def _get_all_outputs( + cls, nas: NetApproxSelector, executor: ExeT, + threshold_eval: ThresholdEvalT, storage: Path = None + ) -> Iterator[KVT]: + preloaded_acceptable = {} + if storage is not None: + bar = tqdm(cls._load_from_pickle(storage)) + for key, tensor in bar: + bar.set_postfix(key=key) + preloaded_acceptable[key] = threshold_eval(tensor) + yield key, tensor + + def evaluate(k: int, i: int) -> Tuple[bool, Optional[KVT]]: + if (k, i) in preloaded_acceptable: + msg_logger.debug(f"Key {(k, i)} is preloaded.") + return preloaded_acceptable[(k, i)], None + outputs = cls.run_model(nas, {k: i}, executor) + if storage is not None: + with storage.open('ab') as f: + pickle.dump(((k, i), outputs), f) + return threshold_eval(outputs), ((k, i), outputs) + + for key_outputs in nas.filter_approxes(evaluate): + # key_outputs is None means corresponding key has been preloaded (we can't see the key) + if key_outputs is None: + continue + yield key_outputs + + @classmethod + def coinit_estimators( + cls, nas: NetApproxSelector, executor: ExeT, threshold_eval: ThresholdEvalT, + *estm_insts: 'LinearEstimator', storage: Path = None + ): + for (k, i), output in cls._get_all_outputs(nas, executor, threshold_eval, storage): + for inst in estm_insts: + inst.profile_table[(k, i)] = inst.handle_output(output) + + @abc.abstractmethod + def get_baseline_profile(self) -> ProfT: + pass + + @abc.abstractmethod + def handle_output(self, outputs: torch.Tensor) -> ProfT: + pass + + @abc.abstractmethod + def estimate(self, config: ConfigT) -> EstmT: + pass + + +class LinearQoSEstimator(LinearEstimator): + """Estimate QoS of a config by linearly adding QoS value. See `LinearEstimator`. + + ProfT = Tuple[QoS(mean), QoS(std)] + NetOutputT = torch.Tensor + """ + + def estimate(self, config: ConfigT) -> EstmT: + baseline_mean: QoS = self.baseline_profile[0] + if not config: + return baseline_mean, baseline_mean + # N * 2 array + profiles = np.array([self.profile_table[kv] for kv in config.items()]) + profiles[:, 0] -= baseline_mean + estm_qos = profiles[:, 0].sum() + baseline_mean + estm_std = sqrt(np.sum(profiles[:, 1] ** 2)) + # We're hardcoding 95% confidence interval here. + assert self.confidence_level == 0.95 + normal_dist_95 = 1.644854 + r1, r2 = estm_qos, estm_qos - normal_dist_95 * estm_std + return float(r1), float(r2) + + def handle_output(self, outputs: torch.Tensor) -> Tuple[QoS, QoS]: + qoses = np.array([self.qos(o) for o in outputs]) + msg_logger.debug(f"Handled {qoses.mean(), qoses.std()}") + return qoses.mean(), qoses.std() + + def get_baseline_profile(self) -> Tuple[QoS, QoS]: + mean_qos = self.qos(self.run_model(self.nas, {}, self.executor)[0]) + return mean_qos, mean_qos.null() + + +class LinearCombEstimator(LinearEstimator): + """Estimate QoS of a config by linearly adding tensor output from network. See `LinearEstimator`. + + On estimation, sums over the delta in tensor output (compared to baseline output) for each + approximation, and then the baseline tensor output is added back. + This works as an estimation of tensor output for this configuration, which is then sent to QoS + metric to get the final QoS. + + QoST = float + ProfT = torch.Tensor (2 * n_inputs * n_classes) + NetOutputT = torch.Tensor (n_inputs * n_classes) + """ + + def estimate(self, config) -> EstmT: + if not config: + baseline_qos = self.qos(self.baseline_profile) + return baseline_qos, baseline_qos + # 4D tensor: n_approx * 2 * n_inputs * n_classes + profiles = torch.stack([self.profile_table[kv] for kv in config.items()]) + profiles -= self.baseline_profile + mean_tensor, confidence_tensor = profiles.sum(dim=0) + self.baseline_profile + estm_mean_qos = self.qos(mean_tensor) + estm_confidence_qos = self.qos(confidence_tensor) + return estm_mean_qos, estm_confidence_qos + + def handle_output(self, outputs: torch.Tensor) -> torch.Tensor: + if len(outputs) == 1: + return torch.stack((outputs[0], outputs[0])) + qoses = np.array([self.qos(o) for o in outputs]) + percentile_pos = int(self.n_nondeterm_runs * (1 - self.confidence_level)) + assert 0 <= percentile_pos < self.n_nondeterm_runs + mean_pos = np.searchsorted(qoses, qoses.mean(), 'right') + assert 0 <= mean_pos <= self.n_nondeterm_runs + if mean_pos == self.n_nondeterm_runs: + mean_pos = self.n_nondeterm_runs - 1 + return torch.stack((outputs[mean_pos], outputs[percentile_pos])) + + def get_baseline_profile(self) -> torch.Tensor: + return self.run_model(self.nas, {}, self.executor)[0] + + +class TrainableEstimator(LinearEstimator, abc.ABC): + """ + QoST = float + ProfT = ProfT + NetOutputT = torch.Tensor (n_inputs * n_classes) + """ + n_train_confs = 50 + weight_range = 0.8, 1.2, 20 + n_cold_start = 500 + accept_threshold = 5 + penalize_overestm = 1.0 + + def __init__( + self, nas: NetApproxSelector, executor: ExeT, qos: QoST, + threshold_eval: ThresholdEvalT, confidence_level: float, + independent_init: bool = True, storage: Path = None + ): + super().__init__(nas, executor, qos, threshold_eval, confidence_level, independent_init, storage) + self.r_cands = np.linspace(*self.weight_range) + self.r_error = np.zeros((len(self.r_cands), self.n_train_confs)) + self.r = self.weight_range[1] + self.trained_iters = 0 + self.cold_start = 0 + + def update_r(self): + mean_error = np.mean(self.r_error, axis=1) + best_idx = np.argmin(mean_error) + self.r = self.r_cands[best_idx] + if best_idx == len(mean_error) - 1 or best_idx == 0: + msg_logger.warning(f"Parameter value r = {self.r} has reached the boundary. Consider a larger range.") + + def get_qos_for_config(self, config: ConfigT) -> EstmT: + is_deterministic = self.nas.is_deterministic(config) + net = self.nas.apply_approx_by_config(config).module + n_runs = 1 if is_deterministic else self.n_nondeterm_runs + qoses = [self.qos(self.executor(net)) for _ in trange(n_runs, leave=False)] + mean_qos, qos_at_confidence, _ = qos_stats(qoses, confidence=self.confidence_level) + return mean_qos, qos_at_confidence + + @abc.abstractmethod + def real_estimate(self, config, rs: Iterable[float] = None) -> List[EstmT]: + pass + + def estimate(self, config) -> EstmT: + estm = self.real_estimate(config)[0] + if self.cold_start < self.n_cold_start: + self.cold_start += 1 + if self.cold_start % 50 == 0: + msg_logger.info(f"WeightedLinearCombEstimator cold start {self.cold_start} / {self.n_cold_start}") + return estm + if self.trained_iters >= self.n_train_confs: + return estm + log_info_freq = 10 + log_level = logging.INFO if self.trained_iters % log_info_freq == 0 else logging.DEBUG + msg_logger.log( + log_level, + f"{self.__class__} train iter {self.trained_iters} / {self.n_train_confs}" + ) + mean_qos, qos_at_confidence = self.get_qos_for_config(config) + estm_conf_qoses = np.array(self.real_estimate(config, rs=self.r_cands))[:, 1] + diff_conf_qoses = qos_at_confidence - estm_conf_qoses + old_r = self.r + self.r_error[:, self.trained_iters] = np.where( + diff_conf_qoses > 0, diff_conf_qoses * self.penalize_overestm, + -diff_conf_qoses + ) + self.trained_iters += 1 + self.update_r() + msg_logger.debug( + f"{self.__class__} real mean qos = {mean_qos}, real conf qos = {qos_at_confidence}, " + f"estm conf qos = {estm[1]}, r: {old_r} -> {self.r}" + ) + return mean_qos, qos_at_confidence + + +class WeightedLinearCombEstimator(TrainableEstimator, LinearCombEstimator): + """ + QoST = float + ProfT = torch.Tensor + NetOutputT = torch.Tensor (n_inputs * n_classes), logged + """ + + def __init__( + self, nas: NetApproxSelector, executor: ExeT, qos: QoST, + threshold_eval: ThresholdEvalT, confidence_level: float, + independent_init: bool = True, storage: Path = None + ): + log_qos = lambda x: qos(torch.exp(x)) + super().__init__(nas, executor, log_qos, threshold_eval, confidence_level, independent_init, storage) + + @staticmethod + def tensor_log(tensor: torch.Tensor) -> torch.Tensor: + # TODO: don't take log if there's no SoftMax layer. + eps = torch.ones_like(tensor) * 1e-10 + return torch.log(torch.max(tensor, eps)) + + def real_estimate(self, config, rs: Iterable[float] = None) -> List[EstmT]: + # 3D tensor: 2 * n_inputs * n_classes + if config: + estm_delta_output = torch.sum( + torch.stack([self.profile_table[kv] for kv in config.items()]) - self.baseline_profile, + dim=0 + ) + else: + n_in, n_out = self.baseline_profile.shape + estm_delta_output = torch.zeros(2, n_in, n_out) + rets = [] + rs = rs if rs is not None else [self.r] + for r in rs: + mean_tensor, confidence_tensor = estm_delta_output * r + self.baseline_profile + rets.append((self.qos(mean_tensor), self.qos(confidence_tensor))) + return rets + + def handle_output(self, outputs: torch.Tensor) -> torch.Tensor: + return LinearCombEstimator.handle_output(self, self.tensor_log(outputs)) + + def get_baseline_profile(self) -> torch.Tensor: + return self.tensor_log(LinearCombEstimator.get_baseline_profile(self)) + + +class WeightedLinearQoSEstimator(TrainableEstimator, LinearQoSEstimator): + """ + QoST = float + ProfT = torch.Tensor + NetOutputT = torch.Tensor (n_inputs * n_classes), logged + """ + + weight_range = 0.5, 5, 50 + + def estimate(self, config) -> EstmT: + ret = super().estimate(config) + msg_logger.debug(f"Config {config} -> estimation {ret}") + return ret + + def real_estimate(self, config, rs: Iterable[float] = None) -> List[EstmT]: + baseline_mean_qos = self.baseline_profile[0] + if config: + # N * 2 array + profiles = np.array([self.profile_table[kv] for kv in config.items()]) + profiles[:, 0] -= baseline_mean_qos + profiles[:, 0][profiles[:, 0] > 0] = 0 + estm_mean_qos_delta = profiles[:, 0].sum() + estm_std = sqrt(np.sum(profiles[:, 1] ** 2)) + else: + estm_mean_qos_delta = estm_std = 0.0 + rets = [] + rs = rs if rs is not None else [self.r] + for r in rs: + estm_mean_qos = float(estm_mean_qos_delta * r + baseline_mean_qos) + # We're hardcoding 95% confidence interval here. + assert self.confidence_level == 0.95 + normal_dist_95 = 1.644854 + estm_conf_qos = estm_mean_qos - normal_dist_95 * estm_std + rets.append((estm_mean_qos, estm_conf_qos)) + return rets diff --git a/llvm/projects/pred_tuner/toolkit/indexing.py b/llvm/projects/pred_tuner/toolkit/indexing.py new file mode 100644 index 0000000000000000000000000000000000000000..27500c152ac5130f6df787f16f53e84c3099bcf6 --- /dev/null +++ b/llvm/projects/pred_tuner/toolkit/indexing.py @@ -0,0 +1,55 @@ +from typing import Callable, Iterator, Optional, Set + +import torch +from torch.nn import Module, Sequential + +UnaryForwardT = Callable[[torch.Tensor], torch.Tensor] +ReplacedForwardT = Callable[[Module, UnaryForwardT, torch.Tensor], torch.Tensor] + + +class ModuleIndexer: + def __init__(self, module: Module, ignore_module: Callable[[Module], bool]): + self.module_to_index = {} + for i, submodule in enumerate(module.modules()): + if ignore_module(submodule): + continue + self.module_to_index[submodule] = i + self.index_to_module = {i: m for m, i in self.module_to_index.items()} + self.module = module + self.layer_parents = self.find_layers_parent_info(module, set(self.all_modules)) + + @staticmethod + def find_layers_parent_info(net: Module, layers: Set[Module]): + ret = {} + for name, submodule in net.named_children(): + if submodule in layers: + ret[submodule] = net, name + ret = {**ret, **ModuleIndexer.find_layers_parent_info(submodule, layers)} + return ret + + @property + def all_modules(self) -> Iterator[Module]: + return iter(self.module_to_index.keys()) + + def find(self, module: Module) -> Optional[int]: + return self.module_to_index.get(module, None) + + def __getitem__(self, item: int) -> Module: + return self.index_to_module[item] + + def __setitem__(self, key: int, value: Module): + old = self.index_to_module[key] + if value != old: + self.index_to_module[key] = value + self.module_to_index[value] = self.module_to_index[old] + self.module_to_index.pop(old) + parent, name = self.layer_parents[old] + self.layer_parents[value] = parent, name + self.layer_parents.pop(old) + parent.__setattr__(name, value) + + def __iter__(self) -> Iterator[Module]: + return self.all_modules + + def __len__(self): + return len(self.module_to_index) diff --git a/llvm/projects/pred_tuner/toolkit/transform.py b/llvm/projects/pred_tuner/toolkit/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..f19554181a9bb9ac10ee9261cd908c2003f18d48 --- /dev/null +++ b/llvm/projects/pred_tuner/toolkit/transform.py @@ -0,0 +1,186 @@ +import copy +import logging +from collections import defaultdict +from typing import Callable, Dict, Generic, Iterator, List, Tuple, TypeVar + +from torch.nn import Module + +from .approxdnn import Approximation, AvailableApproximations +from .indexing import ModuleIndexer + +msg_logger = logging.getLogger(__name__) + + +T1 = TypeVar('T1') +T2 = TypeVar('T2') +TransformerCT = Callable[[int, T1], T2] + + +class StateCapturer(Module, Generic[T2]): + @staticmethod + def _id(_, x): + return x.clone().cpu().detach() + + def __init__(self, net_index: ModuleIndexer, state_transformer: TransformerCT = None): + super().__init__() + self.net_state: Dict[int, List[T2]] = defaultdict(list) + self.state_transformer = state_transformer or self._id + self.net_index = net_index + for submodule in net_index.module.modules(): + submodule.register_forward_hook(self.forward_hook) + self._output = None + + @property + def module(self): + return self.net_index.module + + @property + def output(self): + if self._output is None: + raise RuntimeError("Cannot get output before inference happens") + return self._output + + def forward_hook(self, module: Module, _, outputs): + module_idx = self.net_index.find(module) + if module_idx is None: + raise RuntimeError("Cannot find module; module may have changed externally") + self.net_state[module_idx].append(self.state_transformer(module_idx, outputs)) + + def forward(self, *args, **kwargs): + return self.module.forward(*args, **kwargs) + + def get_output_state(self) -> List[T2]: + return self.net_state[self.injected.output_loc()] + + +T = TypeVar('T') +ConfigT = Dict[int, int] +EvaluatorT = Callable[[int, int], Tuple[bool, T]] + + +class NetApproxSelector: + r"""List all 1-approximation configurations, and apply configurations to a `ModuleDAG` network. + + Computes a list of available approximations for each layer of the network, given info on available + approximations in the system (in the form of an `AvailableApproximations` instance). + Capable of listing all single-approximation configurations, and apply a given configuration to the network. + A configuration is a dict from layer indices to approximation for these layers, one for each. + See `ConfigT`. + + Parameters + ---------- + net : Module + The network to be approximated. + dev_time_only : bool + If True, use only devtime approximations; otherwise use all available approximations. + aa : AvailableApproximations + A container with information of available approximations, and the type of layer each approximation + applies to, etc. + + Attributes + ---------- + net : Module + The network to be approximated (parameter `net`). + net_approxes: Dict[int, List[int]] + A list of available approximation indexes per layer index. + available_approx: AvailableApproximations + Available approximations (parameter `aa`). + """ + + class ApproximationGraph: + """Naive O(n^2) sort for a list of partially-ordered approximations.""" + + def __init__(self, approx_indices: List[int], aa: AvailableApproximations): + import networkx as nx + self.dep_graph = nx.DiGraph() + self.dep_graph.add_nodes_from(approx_indices) + for i, x in enumerate(approx_indices): + for y in approx_indices[i + 1:]: + approx_x, approx_y = aa[x], aa[y] + cmp = approx_x.is_less_approx(approx_y) + if cmp is None: # Not comparable + continue + if cmp: + self.dep_graph.add_edge(x, y) + else: + self.dep_graph.add_edge(y, x) + self.sorted_indices = list(nx.algorithms.topological_sort(self.dep_graph)) + + def __len__(self) -> int: + return len(self.sorted_indices) + + def __iter__(self) -> Iterator[Tuple[int, bool]]: + return iter(self.sorted_indices) + + def __init__( + self, net: Module, dev_time_only: bool = True, ignore_fp32: bool = False, + aa: AvailableApproximations = None + ): + self.available_approx = aa or AvailableApproximations.from_global_knobs_file() + self.type_approxes = self.available_approx.items(dev_time=dev_time_only, ignore_fp32=ignore_fp32) + approximable_types = tuple(self.type_approxes.keys()) + self.net_index = ModuleIndexer(net, lambda m: not isinstance(m, approximable_types)) + self.dev_time_only = dev_time_only + self.net_approxes: Dict[int, List[int]] = defaultdict(list) + for i, layer in self.net_index.index_to_module.items(): + for t, approxes in self.type_approxes.items(): + if isinstance(layer, t): + self.net_approxes[i].extend(approxes) + + def apply_approx_by_config(self, config: ConfigT) -> ModuleIndexer: + """Applies given `config` to network.""" + new_dag = copy.deepcopy(self.net_index) + for layer_idx, config_idx in config.items(): + layer = new_dag[layer_idx] + new_dag[layer_idx] = self.available_approx[config_idx].apply(layer) + return new_dag + + def list_single_approxes(self) -> Iterator[Tuple[int, int, Approximation]]: + for k, vs in self.net_approxes.items(): + for v in vs: + yield k, v, self.available_approx[v] + + def filter_approxes(self, evaluator: EvaluatorT) -> Iterator[T]: + """Enumerate through and apply each single-approximation configuration.""" + net_approxes_graph: Dict[int, NetApproxSelector.ApproximationGraph] = { + k: self.ApproximationGraph(vs, self.available_approx) for k, vs in self.net_approxes.items() + } + from tqdm import tqdm + from utils import gpu_mem_mb + bar1 = tqdm(net_approxes_graph.items(), total=len(net_approxes_graph)) + for k, graph in bar1: + bar1.set_postfix(layer=k) + bar2 = tqdm(graph, leave=None) + unacceptable_approx = None + filtered_layer_approxes = [] + for approx_id in bar2: + approx = self.available_approx[approx_id] + if unacceptable_approx is not None: + cmp = unacceptable_approx.is_less_approx(approx) + if cmp: + msg_logger.debug(f"{approx} is worse than unacceptable approx {unacceptable_approx}") + continue + else: + unacceptable_approx = None + bar2.set_postfix(approx_id=approx_id, mem=gpu_mem_mb()) + acceptable, ret_val = evaluator(k, approx_id) + if not acceptable: + unacceptable_approx = approx + msg_logger.debug(f"{approx} is unacceptable") + continue + filtered_layer_approxes.append(approx_id) + yield ret_val + self.net_approxes[k] = filtered_layer_approxes + + def get_baseline(self) -> Module: + return self.net_index.module + + def get_layer_approxes(self) -> Dict[Module, List[int]]: + """Expose available knobs for autotuner usage.""" + return { + self.net_index[layer_k]: approxes + for layer_k, approxes in self.net_approxes.items() + } + + def is_deterministic(self, config: ConfigT): + return all(self.available_approx[knob_id].deterministic for knob_id in config.values()) diff --git a/llvm/projects/pred_tuner/utils/__init__.py b/llvm/projects/pred_tuner/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1f06b4ae222c3a8a56d4ab4516031e4c91dfa0d2 --- /dev/null +++ b/llvm/projects/pred_tuner/utils/__init__.py @@ -0,0 +1,3 @@ +from .config import Config +from .logging import config_pylogger, reapply_last_config +from .utils import device, get_knob_config_file, get_tensorrt_dir, gpu_mem_mb diff --git a/llvm/projects/pred_tuner/utils/benchmarks.json b/llvm/projects/pred_tuner/utils/benchmarks.json new file mode 100644 index 0000000000000000000000000000000000000000..57184872a07de661c1c9ee4064ec01652e9966ff --- /dev/null +++ b/llvm/projects/pred_tuner/utils/benchmarks.json @@ -0,0 +1,100 @@ +{ + "lenet_hpvm": { + "model_name": "lenet_hpvm", + "autotuner_runs": 10000, + "base_dir": "tuner_results/lenet_keras/", + "layer_file": "autotuner/data/lenet/lenet_layers.txt", + "cost_file": "autotuner/data/lenet/op_cost.txt" + }, + "alexnet_hpvm": { + "model_name": "alexnet_hpvm", + "autotuner_runs": 10000, + "base_dir": "tuner_results/alexnet_cifar10/", + "layer_file": "autotuner/data/alexnet/alexnet_layers.txt", + "cost_file": "autotuner/data/alexnet/op_cost.txt" + }, + "alexnet2_hpvm": { + "model_name": "alexnet2_hpvm", + "autotuner_runs": 10000, + "base_dir": "tuner_results/alexnet2_cifar10/", + "layer_file": "autotuner/data/alexnet2/alexnet2_layers.txt", + "cost_file": "autotuner/data/alexnet2/op_cost.txt" + }, + "vgg16_cifar10_hpvm": { + "model_name": "vgg16_cifar10_hpvm", + "autotuner_runs": 10000, + "base_dir": "tuner_results/vgg16_cifar10/", + "layer_file": "autotuner/data/vgg16_cifar10/vgg16_layers.txt", + "cost_file": "autotuner/data/vgg16_cifar10/op_cost.txt" + }, + "vgg16_cifar100_hpvm": { + "model_name": "vgg16_cifar100_hpvm", + "autotuner_runs": 10000, + "base_dir": "tuner_results/vgg16_cifar100/", + "layer_file": "autotuner/data/vgg16_cifar100/vgg16_layers.txt", + "cost_file": "autotuner/data/vgg16_cifar100/op_cost.txt" + }, + "vgg16_imagenet_hpvm": { + "model_name": "vgg16_imagenet_hpvm", + "autotuner_runs": 20000, + "base_dir": "tuner_results/vgg16_imagenet/", + "layer_file": "autotuner/data/vgg16_imagenet/vgg16_layers.txt", + "cost_file": "autotuner/data/vgg16_imagenet/op_cost.txt" + }, + "resnet18_hpvm": { + "model_name": "resnet18_hpvm", + "autotuner_runs": 10000, + "base_dir": "tuner_results/resnet18_cifar10/", + "layer_file": "autotuner/data/resnet/resnet_layers.txt", + "cost_file": "autotuner/data/resnet/op_cost.txt" + }, + "resnet50_imagenet_hpvm": { + "model_name": "resnet50_imagenet_hpvm", + "autotuner_runs": 30000, + "base_dir": "tuner_results/resnet50_imagenet/", + "layer_file": "autotuner/data/resnet50_imagenet/resnet50_layers.txt", + "cost_file": "autotuner/data/resnet50_imagenet/op_cost.txt" + }, + "mobilenet_hpvm": { + "model_name": "mobilenet_hpvm", + "autotuner_runs": 20000, + "base_dir": "tuner_results/mobilenet/", + "layer_file": "autotuner/data/mobilenet/mobilenet_layer_comp.txt", + "cost_file": "autotuner/data/mobilenet/op_cost.txt" + }, + "__unused_mobilenet_shallow": { + "model_name": "mobilenet_shallow_hpvm", + "autotuner_runs": 10000, + "base_dir": "tuner_results/mobilenet_shallow/", + "layer_file": "autotuner/data/mobilenet_shallow/mobilenet_shallow_layer_comp.txt", + "cost_file": "autotuner/data/mobilenet_shallow/op_cost.txt" + }, + "alexnet_imagenet_hpvm": { + "model_name": "alexnet_imagenet_hpvm", + "autotuner_runs": 10000, + "base_dir": "tuner_results/alexnet_imagenet/", + "layer_file": "autotuner/data/alexnet_imagenet/layer_composition.txt", + "cost_file": "autotuner/data/alexnet_imagenet/op_cost.txt" + }, + "alexnet2_canny_hpvm": { + "model_name": "alexnet2_canny_hpvm", + "autotuner_runs": 10000, + "base_dir": "tuner_results/alexnet2_canny_hpvm/", + "layer_file": "autotuner/data/alexnet2_canny_hpvm/layers.txt", + "cost_file": "autotuner/data/alexnet2_canny_hpvm/op_cost.txt" + }, + "resnet18_torch": { + "model_name": "resnet18_torch", + "autotuner_runs": 10000, + "base_dir": "tuner_results/resnet18_cifar10_torch/", + "layer_file": "autotuner/data/resnet18_torch/resnet_layers.txt", + "cost_file": "autotuner/data/resnet18_torch/op_cost.txt" + }, + "vgg16_torch": { + "model_name": "vgg16_torch", + "autotuner_runs": 10000, + "base_dir": "tuner_results/resnet18_cifar10_torch/", + "layer_file": "autotuner/data/resnet/resnet_layers.txt", + "cost_file": "autotuner/data/resnet/op_cost.txt" + } +} \ No newline at end of file diff --git a/llvm/projects/pred_tuner/utils/config.py b/llvm/projects/pred_tuner/utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..fced1a4d462ad9bb4c828f2bbc264bb4b4755081 --- /dev/null +++ b/llvm/projects/pred_tuner/utils/config.py @@ -0,0 +1,318 @@ +from pathlib import Path +from typing import Dict, Iterable, List, Union + +import matplotlib.pyplot as plt +import numpy as np + +from models.domains import QoS +from models.domains.qoses import Accuracy, AccuracyPSNR +from .utils import get_knob_config_file + +op_mapping = { + "conv": "conv", "depthwise_conv": "group_conv", "dense": "mul", "batchnorm": "batchnorm", + "pool": "pool_max", "pool_mean": "pool_mean", "activation": "relu", "tanh": "tanh", "add": "add", + "reduce": "red_samp" +} + +approx_map = {} +PathLike = Union[str, Path] + + +def initializeApproxMap(knobs_file_path): + f = open(knobs_file_path, "r") + + for x in f: + toks = x.split("\t") + approx_type = toks[0].split(",")[0] + knob_id = toks[0].split(",")[1] + approx_str = approx_type + " " + knob_id + approx_map[knob_id] = approx_str + + +initializeApproxMap(get_knob_config_file()) + +# TODO: fix hardcoding +fp32_to_fp16 = { + **{k: k + 30 for k in range(121, 138 + 1)}, + **{k: k + 30 for k in range(231, 248 + 1)}, + 11: 12 +} +fp16_to_fp32 = {v: k for k, v in fp32_to_fp16.items()} + + +class Config: + def __init__( + self, avg_accuracy: QoS, baseline_accuracy: QoS, fname: str, flags: List[int], + total_runs: int, confidence: float, config_cost: float, speedup: float + ): + self.total_runs = total_runs + self.confidence = confidence + self.config_cost = config_cost + self.speedup = speedup + self.avg_qos = avg_accuracy + self.baseline_qos = baseline_accuracy + self.fname = fname + self.flags = flags + self.avg_loss = self.avg_loss.min_positive_loss() + + @property + def avg_loss(self): + return self.baseline_qos - self.avg_qos + + @avg_loss.setter + def avg_loss(self, value: QoS): + self.avg_qos = self.baseline_qos - value + + def __repr__(self): + return repr((self.fname, self.speedup, self.avg_qos, self.avg_loss, self.flags)) + + @staticmethod + def qos_speedup_points(configs: Iterable['Config']) -> np.ndarray: + return np.array([[*conf.avg_qos.numpy(), conf.speedup] for conf in configs]) + + def update_acc(self, acc: QoS, confidence: float, baseline_acc: QoS = None): + if baseline_acc: + self.baseline_qos = baseline_acc + self.avg_qos = acc + self.avg_loss = self.avg_loss.min_positive_loss() + self.confidence = confidence + + def to_fp16(self) -> 'Config': + import copy + fp16_conf = copy.copy(self) + fp16_conf.flags = [fp32_to_fp16.get(x, x) for x in self.flags] + return fp16_conf + + def to_fp32(self) -> 'Config': + import copy + fp32_conf = copy.copy(self) + fp32_conf.flags = [fp16_to_fp32.get(x, x) for x in self.flags] + return fp32_conf + + def to_rt_format(self, idx: int, bench_layer_composition, hardware_target: str): + config_str = build_config_str(self.flags, bench_layer_composition, hardware_target) + return ( + "+++++\n" + f"conf{idx} {self.speedup} 0 {self.avg_qos} {self.avg_loss}\n" + f"{config_str}" + "-----\n" + ) + + def to_tuner_format(self): + topline = ( + f"total_runs={self.total_runs}\tconfidence={self.confidence}\t" + f"avg_accuracy={self.avg_qos}\tconfig_cost={self.config_cost}\tspeedup={self.speedup}" + ) + flags_lines = [str(x) for x in self.flags] + return '\n'.join([topline] + flags_lines) + + @classmethod + def from_tuner_format(cls, lines: List[str], fname: str, baseline_accuracy: QoS): + def parseTopLine(x: str) -> Dict[str, str]: + toks = x.split() + fields = {} + for tok in toks: + field, value = tok.split('=') + fields[field] = value + return fields + + top_line = parseTopLine(lines[0]) + total_runs = int(top_line['total_runs']) + confidence = float(top_line['confidence']) + avg_accuracy = baseline_accuracy.parse(top_line['avg_accuracy']) + config_cost = float(top_line['config_cost']) + speedup = float(top_line['speedup']) + flags = [int(line.strip()) for line in lines[1:] if line.strip()] + return cls(avg_accuracy, baseline_accuracy, fname, flags, total_runs, confidence, config_cost, speedup) + + +def genScatterPlotFromConfigs(configs, file_path): + speedups, accuracy_losses = [c.speedup for c in configs], [c.avg_loss for c in configs] + plt.scatter(accuracy_losses, speedups) + plt.xlabel("accuracy_loss") + plt.ylabel("speedup") + plt.xlim(left=-0.05) + plt.ylim(bottom=1) + plt.savefig(file_path) + plt.close() + + +def _find_distance_to(points: np.ndarray, ref_points: np.ndarray) -> np.ndarray: + n_ref = len(ref_points) + if n_ref == 0: + return np.zeros(0) + if n_ref == 1: + return np.linalg.norm(points - ref_points, axis=1) + ref_points = np.array(sorted(ref_points, key=lambda p: p[0])) + px = points.T[0] + rx = ref_points.T[0] + local_unit_vecs = ref_points[1:] - ref_points[:-1] + dists = [] + bins = np.digitize(px, rx) - 1 + for point, left_ref_p in zip(points, bins): + if left_ref_p == -1: + left_ref_p = 0 + to_left_ref = ref_points[left_ref_p] - point + local_unit_vec = local_unit_vecs[-1] if left_ref_p >= n_ref - 1 else local_unit_vecs[left_ref_p] + projection = np.dot(local_unit_vec, to_left_ref) / np.linalg.norm(local_unit_vec) + dist = np.sqrt(np.linalg.norm(to_left_ref) ** 2 - projection ** 2) + dists.append(dist) + return np.array(dists) + + +def is_pareto_efficient( + configs: List[Config], margin: float = None, + ratio: float = None, n_min: int = None, n_max: int = None +) -> List[Config]: + configs = np.array(configs) + acc_speedup = Config.qos_speedup_points(configs) + is_efficient = np.ones(acc_speedup.shape[0], dtype=bool) + for idx, c in enumerate(acc_speedup): + if is_efficient[idx]: + # Keep any point with a higher value + is_efficient[is_efficient] = np.any(acc_speedup[is_efficient] > c, axis=1) + is_efficient[idx] = True # And keep self + pareto_acc_speedup = acc_speedup[is_efficient] + pareto_configs = configs[is_efficient] + non_pareto_acc_speedup = acc_speedup[np.logical_not(is_efficient)] + non_pareto_configs = configs[np.logical_not(is_efficient)] + dist_to_pareto = _find_distance_to(non_pareto_acc_speedup, pareto_acc_speedup) + if margin is not None: + marginal_accepted = non_pareto_configs[dist_to_pareto < margin] + elif ratio is not None: + dist_order = np.argsort(dist_to_pareto) + take_n = int(len(dist_to_pareto) * ratio) + if n_min is not None: + take_n = max(take_n, n_min) + if n_max is not None: + take_n = min(take_n, n_max) + take_n -= len(pareto_configs) + marginal_accepted = non_pareto_configs[dist_order[:take_n]] + else: + raise ValueError("Must provide margin or ratio") + return pareto_configs.tolist() + marginal_accepted.tolist() + + +def print_layer_info(flag: int, hardware_target: str, layer_comp): + approx_tech = approx_map[str(flag)] + if flag <= 7: + # If is PROMISE + return f"promise {approx_tech}" + # If is GPU / CPU + op0 = op_mapping[layer_comp[0]] + config_str = f"{hardware_target} {op0} {approx_tech} " + for op in layer_comp[1:]: + op_name = op_mapping[op] + fp = "fp32" if is_fp32(flag) else "fp16" + config_str += f"{op_name} {fp} 1 " + return config_str + + +def build_config_str(flags: List[int], layer_desc: List[List[str]], hardware_target: str): + lines = [] + assert len(flags) == len(layer_desc) + for index, (flag, layer_comp) in enumerate(zip(flags, layer_desc), start=1): + layer_str = print_layer_info(flag, hardware_target, layer_comp) + config_str = f"{index} {layer_str}" + lines.append(config_str) + lines.append(f"{len(layer_desc) + 1} {hardware_target} softmax fp32 1\n") + return '\n'.join(lines) + + +def is_fp32(flag: int): + return flag in fp32_to_fp16 + + +def dump_configs_to_rt( + layer_desc, configs: List[Config], + config_out_path: PathLike, baseline_acc: QoS, hardware_target: str +): + baseline_flag = 11 + baseline_config = Config( + baseline_acc, baseline_acc, '', [baseline_flag for _ in layer_desc], + 1, 100.0, 0.0, 1.0 + ) + baseline_str = baseline_config.to_rt_format(1, layer_desc, hardware_target) + with config_out_path.open("w") as f: + f.write(baseline_str) + for it, config in enumerate(configs, start=2): + f.write(config.to_rt_format(it, layer_desc, hardware_target)) + + +# Public Interfaces +def dump_rt_format_to( + layer_desc, configs: List[Config], gold_acc: QoS, + rt_cpu_path: PathLike = None, rt_gpu_path: PathLike = None +): + if configs: + assert len(set([conf.baseline_qos for conf in configs])) == 1 + # Sort configs + sorted_configs = sorted(configs, key=lambda conf: (conf.avg_loss, conf.speedup, conf.flags)) + if rt_gpu_path is not None: + # Remap to fp16 for gpu. + fp16_configs = [conf.to_fp16() for conf in sorted_configs] + dump_configs_to_rt( + layer_desc, fp16_configs, rt_gpu_path, gold_acc, 'gpu' + ) + if rt_cpu_path is not None: + # Remap to fp32 for cpu. + fp32_configs = [conf.to_fp32() for conf in sorted_configs] + dump_configs_to_rt( + layer_desc, fp32_configs, rt_cpu_path, gold_acc, 'cpu' + ) + + +def plot_configs(file_path: Path, **kw_configs: List[Config]): + from mpl_toolkits.mplot3d import Axes3D + # Decide 2D or 3D plot: + qos_type = None + for label, confs in kw_configs.items(): + if not confs: + continue + if not qos_type: + qos_type = type(confs[0].avg_qos) + else: + assert qos_type == type(confs[0].avg_qos) + if qos_type is None: + return + if qos_type is AccuracyPSNR: + fig: plt.Figure = plt.figure() + ax: Axes3D = fig.add_subplot(111, projection='3d') + for label, confs in kw_configs.items(): + data = np.array([ + [c.avg_loss.qoses[0].to_scalar(), c.avg_qos.qoses[1].to_scalar(), c.speedup] + for c in confs] + ) + x, y, z = data.T + ax.scatter(x, y, z, label=label) + ax.set_xlabel("accuracy_loss") + ax.set_ylabel("psnr") + ax.set_zlabel("speedup") + ax.set_xlim(left=-0.05) + ax.set_zlim(bottom=1) + elif qos_type is Accuracy: + fig, ax = plt.subplots() + fig: plt.Figure + ax: plt.Axes + for label, confs in kw_configs.items(): + data = np.array([[c.avg_loss.to_scalar(), c.speedup] for c in confs]) + x, y = data.T + ax.scatter(x, y, label=label) + ax.set_xlabel("accuracy_loss") + ax.set_ylabel("speedup") + ax.set_xlim(left=-0.05) + ax.set_ylim(bottom=1) + else: + raise ValueError(f"QoS type {qos_type} unsupported in plotting.") + ax.legend() + fig.savefig(file_path) + plt.close(fig) + + +def load_configs_from_dir(result_dir: PathLike, baseline_accuracy: QoS): + config_arr = [] + for path in Path(result_dir).glob('*'): + with path.open() as f: + lines = f.readlines() + config_arr.append(Config.from_tuner_format(lines, path.name, baseline_accuracy)) + return config_arr diff --git a/llvm/projects/pred_tuner/utils/logging.py b/llvm/projects/pred_tuner/utils/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..6b6904bd2e0a0683ccc6905994f645fa6856ad4d --- /dev/null +++ b/llvm/projects/pred_tuner/utils/logging.py @@ -0,0 +1,87 @@ +import logging +from logging import config +import os +from pathlib import Path + +import tqdm + + +class TqdmStreamHandler(logging.Handler): + """tqdm-friendly logging handler. Uses tqdm.write instead of print for logging.""" + + def __init__(self, level=logging.NOTSET): + super().__init__(level) + + def emit(self, record): + try: + msg = self.format(record) + tqdm.tqdm.write(msg) + self.flush() + except (KeyboardInterrupt, SystemExit, RecursionError): + raise + except: + self.handleError(record) + + +_last_applied_config = None + + +def config_pylogger(filename: str = None, output_dir: Path = None, verbose: bool = False) -> logging.Logger: + """Configure the Python logger. + + For each execution of the application, we'd like to create a unique log file. + By default this file is named using the date and time of day, so that it can be sorted by recency. + You can also name your filename or choose the log directory. + """ + import time + timestr = time.strftime("%Y.%m.%d-%H%M%S") + filename = filename or timestr + output_dir = output_dir or Path('.') + if not os.path.exists(output_dir): + os.makedirs(output_dir) + file_path = output_dir / filename + + global _last_applied_config + _last_applied_config = d = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'simple': { + 'format': '%(levelname)s %(name)s: ' + '%(message)s' + }, + 'detailed': { + 'format': '[%(asctime)-15s] ' + '%(levelname)7s %(name)s: ' + '%(message)s ' + '@%(filename)s:%(lineno)d' + } + }, + 'handlers': { + 'console': { + '()': TqdmStreamHandler, + 'level': 'INFO', + 'formatter': 'simple' + }, + 'file': { + 'class': 'logging.FileHandler', + 'filename': file_path.as_posix(), + 'mode': 'a', # Because we may apply this config again, want to keep existing content + 'formatter': 'detailed', + }, + }, + 'root': { + 'level': 'DEBUG' if verbose else 'INFO', + 'handlers': ['console', 'file'] + }, + } + config.dictConfig(d) + + msglogger = logging.getLogger() + msglogger.info(f"Log file for this run: {file_path}") + return msglogger + + +def reapply_last_config(): + if _last_applied_config is not None: + config.dictConfig(_last_applied_config) diff --git a/llvm/projects/pred_tuner/utils/utils.py b/llvm/projects/pred_tuner/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..16165574662ca91320784f827468002fbae21fa8 --- /dev/null +++ b/llvm/projects/pred_tuner/utils/utils.py @@ -0,0 +1,26 @@ +import logging +import os +from pathlib import Path + +import torch + +device = f'cuda:{torch.cuda.device_count() - 1}' if torch.cuda.is_available() else 'cpu' +n_cpu_threads = 12 if device == 'cuda:0' else 35 +torch.set_num_threads(n_cpu_threads) + +msg_logger = logging.getLogger(__name__) + + +def gpu_mem_mb(): + # noinspection PyTypeChecker + return torch.cuda.memory_allocated(device) / 1024 ** 2 + + +def get_tensorrt_dir() -> Path: + if 'LLVM_SRC_ROOT' not in os.environ: + return Path('.') + return Path(os.environ['LLVM_SRC_ROOT']) / "projects/hpvm-tensor-rt" + + +def get_knob_config_file() -> Path: + return get_tensorrt_dir() / "autotuner/data/global_knobs.txt"