diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_confs_batch220.txt index 948efe5bd7586727c5fe4fa7ccc73e7319bf97d6..4a14a5f2e45c83a2960deccbcd0296a6d9a2f2bc 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_confs_batch220.txt @@ -1,87 +1,87 @@ +++++ conf1 1 0 83.5 0 1 gpu conv fp32 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 +2 gpu batchnorm fp32 1 +3 gpu relu fp32 1 +4 gpu group_conv fp32 1 +5 gpu batchnorm fp32 1 +6 gpu relu fp32 1 7 gpu conv fp32 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 +8 gpu batchnorm fp32 1 +9 gpu relu fp32 1 +10 gpu group_conv fp32 1 +11 gpu batchnorm fp32 1 +12 gpu relu fp32 1 13 gpu conv fp32 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 +14 gpu batchnorm fp32 1 +15 gpu relu fp32 1 +16 gpu group_conv fp32 1 +17 gpu batchnorm fp32 1 +18 gpu relu fp32 1 19 gpu conv fp32 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 +20 gpu batchnorm fp32 1 +21 gpu relu fp32 1 +22 gpu group_conv fp32 1 +23 gpu batchnorm fp32 1 +24 gpu relu fp32 1 25 gpu conv fp32 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 +26 gpu batchnorm fp32 1 +27 gpu relu fp32 1 +28 gpu group_conv fp32 1 +29 gpu batchnorm fp32 1 +30 gpu relu fp32 1 31 gpu conv fp32 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 +32 gpu batchnorm fp32 1 +33 gpu relu fp32 1 +34 gpu group_conv fp32 1 +35 gpu batchnorm fp32 1 +36 gpu relu fp32 1 37 gpu conv fp32 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 +38 gpu batchnorm fp32 1 +39 gpu relu fp32 1 +40 gpu group_conv fp32 1 +41 gpu batchnorm fp32 1 +42 gpu relu fp32 1 43 gpu conv fp32 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 +44 gpu batchnorm fp32 1 +45 gpu relu fp32 1 +46 gpu group_conv fp32 1 +47 gpu batchnorm fp32 1 +48 gpu relu fp32 1 49 gpu conv fp32 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 +50 gpu batchnorm fp32 1 +51 gpu relu fp32 1 +52 gpu group_conv fp32 1 +53 gpu batchnorm fp32 1 +54 gpu relu fp32 1 55 gpu conv fp32 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 +56 gpu batchnorm fp32 1 +57 gpu relu fp32 1 +58 gpu group_conv fp32 1 +59 gpu batchnorm fp32 1 +60 gpu relu fp32 1 61 gpu conv fp32 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 +62 gpu batchnorm fp32 1 +63 gpu relu fp32 1 +64 gpu group_conv fp32 1 +65 gpu batchnorm fp32 1 +66 gpu relu fp32 1 67 gpu conv fp32 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 +68 gpu batchnorm fp32 1 +69 gpu relu fp32 1 +70 gpu group_conv fp32 1 +71 gpu batchnorm fp32 1 +72 gpu relu fp32 1 73 gpu conv fp32 1 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 +74 gpu batchnorm fp32 1 +75 gpu relu fp32 1 +76 gpu group_conv fp32 1 +77 gpu batchnorm fp32 1 +78 gpu relu fp32 1 79 gpu conv fp32 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 +80 gpu batchnorm fp32 1 +81 gpu relu fp32 1 +82 gpu pool_mean fp32 1 83 gpu mul fp32 1 add fp32 1 84 gpu softmax fp32 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_pareto_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_pareto_confs_batch220.txt index 66833d06b3af9ad7c4bcefdbea9c2e977eeea378..86b061f3d9ff5b75a9580ae65afd9ff6c20f9701 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_pareto_confs_batch220.txt @@ -1,87 +1,87 @@ +++++ conf1 1 0 83.5 0 1 gpu conv fp32 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 +2 gpu batchnorm fp32 1 +3 gpu relu fp32 1 +4 gpu group_conv fp32 1 +5 gpu batchnorm fp32 1 +6 gpu relu fp32 1 7 gpu conv fp32 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 +8 gpu batchnorm fp32 1 +9 gpu relu fp32 1 +10 gpu group_conv fp32 1 +11 gpu batchnorm fp32 1 +12 gpu relu fp32 1 13 gpu conv fp32 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 +14 gpu batchnorm fp32 1 +15 gpu relu fp32 1 +16 gpu group_conv fp32 1 +17 gpu batchnorm fp32 1 +18 gpu relu fp32 1 19 gpu conv fp32 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 +20 gpu batchnorm fp32 1 +21 gpu relu fp32 1 +22 gpu group_conv fp32 1 +23 gpu batchnorm fp32 1 +24 gpu relu fp32 1 25 gpu conv fp32 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 +26 gpu batchnorm fp32 1 +27 gpu relu fp32 1 +28 gpu group_conv fp32 1 +29 gpu batchnorm fp32 1 +30 gpu relu fp32 1 31 gpu conv fp32 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 +32 gpu batchnorm fp32 1 +33 gpu relu fp32 1 +34 gpu group_conv fp32 1 +35 gpu batchnorm fp32 1 +36 gpu relu fp32 1 37 gpu conv fp32 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 +38 gpu batchnorm fp32 1 +39 gpu relu fp32 1 +40 gpu group_conv fp32 1 +41 gpu batchnorm fp32 1 +42 gpu relu fp32 1 43 gpu conv fp32 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 +44 gpu batchnorm fp32 1 +45 gpu relu fp32 1 +46 gpu group_conv fp32 1 +47 gpu batchnorm fp32 1 +48 gpu relu fp32 1 49 gpu conv fp32 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 +50 gpu batchnorm fp32 1 +51 gpu relu fp32 1 +52 gpu group_conv fp32 1 +53 gpu batchnorm fp32 1 +54 gpu relu fp32 1 55 gpu conv fp32 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 +56 gpu batchnorm fp32 1 +57 gpu relu fp32 1 +58 gpu group_conv fp32 1 +59 gpu batchnorm fp32 1 +60 gpu relu fp32 1 61 gpu conv fp32 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 +62 gpu batchnorm fp32 1 +63 gpu relu fp32 1 +64 gpu group_conv fp32 1 +65 gpu batchnorm fp32 1 +66 gpu relu fp32 1 67 gpu conv fp32 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 +68 gpu batchnorm fp32 1 +69 gpu relu fp32 1 +70 gpu group_conv fp32 1 +71 gpu batchnorm fp32 1 +72 gpu relu fp32 1 73 gpu conv fp32 1 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 +74 gpu batchnorm fp32 1 +75 gpu relu fp32 1 +76 gpu group_conv fp32 1 +77 gpu batchnorm fp32 1 +78 gpu relu fp32 1 79 gpu conv fp32 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 +80 gpu batchnorm fp32 1 +81 gpu relu fp32 1 +82 gpu pool_mean fp32 1 83 gpu mul fp32 1 add fp32 1 84 gpu softmax fp32 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt index baffc185452ce288432fa55e3d8ad7ced9ff44d2..3b628d570fcb1884cfa10371a2aaf6856a652d1e 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt @@ -1,88 +1,88 @@ +++++ -conf1 4.15413017186 0 83.163334475 0.5049982875000012 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 +conf1 1 0 83.5 0 +1 gpu conv fp32 1 +2 gpu batchnorm fp32 1 +3 gpu relu fp32 1 +4 gpu group_conv fp32 1 +5 gpu batchnorm fp32 1 +6 gpu relu fp32 1 +7 gpu conv fp32 1 +8 gpu batchnorm fp32 1 +9 gpu relu fp32 1 +10 gpu group_conv fp32 1 +11 gpu batchnorm fp32 1 +12 gpu relu fp32 1 +13 gpu conv fp32 1 +14 gpu batchnorm fp32 1 +15 gpu relu fp32 1 +16 gpu group_conv fp32 1 +17 gpu batchnorm fp32 1 +18 gpu relu fp32 1 +19 gpu conv fp32 1 +20 gpu batchnorm fp32 1 +21 gpu relu fp32 1 +22 gpu group_conv fp32 1 +23 gpu batchnorm fp32 1 +24 gpu relu fp32 1 +25 gpu conv fp32 1 +26 gpu batchnorm fp32 1 +27 gpu relu fp32 1 +28 gpu group_conv fp32 1 +29 gpu batchnorm fp32 1 +30 gpu relu fp32 1 +31 gpu conv fp32 1 +32 gpu batchnorm fp32 1 +33 gpu relu fp32 1 +34 gpu group_conv fp32 1 +35 gpu batchnorm fp32 1 +36 gpu relu fp32 1 +37 gpu conv fp32 1 +38 gpu batchnorm fp32 1 +39 gpu relu fp32 1 +40 gpu group_conv fp32 1 +41 gpu batchnorm fp32 1 +42 gpu relu fp32 1 +43 gpu conv fp32 1 +44 gpu batchnorm fp32 1 +45 gpu relu fp32 1 +46 gpu group_conv fp32 1 +47 gpu batchnorm fp32 1 +48 gpu relu fp32 1 +49 gpu conv fp32 1 +50 gpu batchnorm fp32 1 +51 gpu relu fp32 1 +52 gpu group_conv fp32 1 +53 gpu batchnorm fp32 1 +54 gpu relu fp32 1 +55 gpu conv fp32 1 +56 gpu batchnorm fp32 1 +57 gpu relu fp32 1 +58 gpu group_conv fp32 1 +59 gpu batchnorm fp32 1 +60 gpu relu fp32 1 +61 gpu conv fp32 1 +62 gpu batchnorm fp32 1 +63 gpu relu fp32 1 +64 gpu group_conv fp32 1 +65 gpu batchnorm fp32 1 +66 gpu relu fp32 1 +67 gpu conv fp32 1 +68 gpu batchnorm fp32 1 +69 gpu relu fp32 1 +70 gpu group_conv fp32 1 +71 gpu batchnorm fp32 1 +72 gpu relu fp32 1 +73 gpu conv fp32 1 +74 gpu batchnorm fp32 1 +75 gpu relu fp32 1 +76 gpu group_conv fp32 1 +77 gpu batchnorm fp32 1 +78 gpu relu fp32 1 +79 gpu conv fp32 1 +80 gpu batchnorm fp32 1 +81 gpu relu fp32 1 +82 gpu pool_mean fp32 1 +83 gpu mul fp32 1 add fp32 1 84 gpu softmax fp32 1 ----- +++++ diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi2.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi2.txt index fa5a561bf6fd17c4b2ce372884ac02524ce135f5..ff7fdbf108c1cbca0154d6c300cd3ebbdaf7cd6d 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi2.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi2.txt @@ -1,87 +1,87 @@ +++++ conf1 1 0 83.5 0 1 gpu conv fp32 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 +2 gpu batchnorm fp32 1 +3 gpu relu fp32 1 +4 gpu group_conv fp32 1 +5 gpu batchnorm fp32 1 +6 gpu relu fp32 1 7 gpu conv fp32 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 +8 gpu batchnorm fp32 1 +9 gpu relu fp32 1 +10 gpu group_conv fp32 1 +11 gpu batchnorm fp32 1 +12 gpu relu fp32 1 13 gpu conv fp32 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 +14 gpu batchnorm fp32 1 +15 gpu relu fp32 1 +16 gpu group_conv fp32 1 +17 gpu batchnorm fp32 1 +18 gpu relu fp32 1 19 gpu conv fp32 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 +20 gpu batchnorm fp32 1 +21 gpu relu fp32 1 +22 gpu group_conv fp32 1 +23 gpu batchnorm fp32 1 +24 gpu relu fp32 1 25 gpu conv fp32 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 +26 gpu batchnorm fp32 1 +27 gpu relu fp32 1 +28 gpu group_conv fp32 1 +29 gpu batchnorm fp32 1 +30 gpu relu fp32 1 31 gpu conv fp32 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 +32 gpu batchnorm fp32 1 +33 gpu relu fp32 1 +34 gpu group_conv fp32 1 +35 gpu batchnorm fp32 1 +36 gpu relu fp32 1 37 gpu conv fp32 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 +38 gpu batchnorm fp32 1 +39 gpu relu fp32 1 +40 gpu group_conv fp32 1 +41 gpu batchnorm fp32 1 +42 gpu relu fp32 1 43 gpu conv fp32 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 +44 gpu batchnorm fp32 1 +45 gpu relu fp32 1 +46 gpu group_conv fp32 1 +47 gpu batchnorm fp32 1 +48 gpu relu fp32 1 49 gpu conv fp32 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 +50 gpu batchnorm fp32 1 +51 gpu relu fp32 1 +52 gpu group_conv fp32 1 +53 gpu batchnorm fp32 1 +54 gpu relu fp32 1 55 gpu conv fp32 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 +56 gpu batchnorm fp32 1 +57 gpu relu fp32 1 +58 gpu group_conv fp32 1 +59 gpu batchnorm fp32 1 +60 gpu relu fp32 1 61 gpu conv fp32 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 +62 gpu batchnorm fp32 1 +63 gpu relu fp32 1 +64 gpu group_conv fp32 1 +65 gpu batchnorm fp32 1 +66 gpu relu fp32 1 67 gpu conv fp32 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 +68 gpu batchnorm fp32 1 +69 gpu relu fp32 1 +70 gpu group_conv fp32 1 +71 gpu batchnorm fp32 1 +72 gpu relu fp32 1 73 gpu conv fp32 1 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 +74 gpu batchnorm fp32 1 +75 gpu relu fp32 1 +76 gpu group_conv fp32 1 +77 gpu batchnorm fp32 1 +78 gpu relu fp32 1 79 gpu conv fp32 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 +80 gpu batchnorm fp32 1 +81 gpu relu fp32 1 +82 gpu pool_mean fp32 1 83 gpu mul fp32 1 add fp32 1 84 gpu softmax fp32 1 ----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_single.txt index bf55690f22ad5f9a3de72bd16d4907d8099512a9..04d1491bc7ddcfd94ce837cc830fa0874496842e 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_single.txt @@ -1,87 +1,87 @@ +++++ conf1 1 0 83.5 0 1 gpu conv fp32 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 +2 gpu batchnorm fp32 1 +3 gpu relu fp32 1 +4 gpu group_conv fp32 1 +5 gpu batchnorm fp32 1 +6 gpu relu fp32 1 7 gpu conv fp32 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 +8 gpu batchnorm fp32 1 +9 gpu relu fp32 1 +10 gpu group_conv fp32 1 +11 gpu batchnorm fp32 1 +12 gpu relu fp32 1 13 gpu conv fp32 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 +14 gpu batchnorm fp32 1 +15 gpu relu fp32 1 +16 gpu group_conv fp32 1 +17 gpu batchnorm fp32 1 +18 gpu relu fp32 1 19 gpu conv fp32 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 +20 gpu batchnorm fp32 1 +21 gpu relu fp32 1 +22 gpu group_conv fp32 1 +23 gpu batchnorm fp32 1 +24 gpu relu fp32 1 25 gpu conv fp32 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 +26 gpu batchnorm fp32 1 +27 gpu relu fp32 1 +28 gpu group_conv fp32 1 +29 gpu batchnorm fp32 1 +30 gpu relu fp32 1 31 gpu conv fp32 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 +32 gpu batchnorm fp32 1 +33 gpu relu fp32 1 +34 gpu group_conv fp32 1 +35 gpu batchnorm fp32 1 +36 gpu relu fp32 1 37 gpu conv fp32 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 +38 gpu batchnorm fp32 1 +39 gpu relu fp32 1 +40 gpu group_conv fp32 1 +41 gpu batchnorm fp32 1 +42 gpu relu fp32 1 43 gpu conv fp32 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 +44 gpu batchnorm fp32 1 +45 gpu relu fp32 1 +46 gpu group_conv fp32 1 +47 gpu batchnorm fp32 1 +48 gpu relu fp32 1 49 gpu conv fp32 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 +50 gpu batchnorm fp32 1 +51 gpu relu fp32 1 +52 gpu group_conv fp32 1 +53 gpu batchnorm fp32 1 +54 gpu relu fp32 1 55 gpu conv fp32 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 +56 gpu batchnorm fp32 1 +57 gpu relu fp32 1 +58 gpu group_conv fp32 1 +59 gpu batchnorm fp32 1 +60 gpu relu fp32 1 61 gpu conv fp32 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 +62 gpu batchnorm fp32 1 +63 gpu relu fp32 1 +64 gpu group_conv fp32 1 +65 gpu batchnorm fp32 1 +66 gpu relu fp32 1 67 gpu conv fp32 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 +68 gpu batchnorm fp32 1 +69 gpu relu fp32 1 +70 gpu group_conv fp32 1 +71 gpu batchnorm fp32 1 +72 gpu relu fp32 1 73 gpu conv fp32 1 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 +74 gpu batchnorm fp32 1 +75 gpu relu fp32 1 +76 gpu group_conv fp32 1 +77 gpu batchnorm fp32 1 +78 gpu relu fp32 1 79 gpu conv fp32 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 +80 gpu batchnorm fp32 1 +81 gpu relu fp32 1 +82 gpu pool_mean fp32 1 83 gpu mul fp32 1 add fp32 1 84 gpu softmax fp32 1 -----