From 07b1f2643bd98cebc78b90dfa33bd4d195776a86 Mon Sep 17 00:00:00 2001 From: Elizabeth <hashim.sharif91@gmail.com> Date: Sun, 17 Nov 2019 17:50:21 -0600 Subject: [PATCH] Fixsed tanh/poolmax order for fp32 baseline --- .../tuner_pareto_confs_batch220.txt | 682 +++++++++--------- 1 file changed, 345 insertions(+), 337 deletions(-) diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt index 2e3185632c..707fd70be0 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt @@ -1,896 +1,904 @@ +++++ +conf1 1 0 99.69 0 +1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu mul fp32 1 add fp32 1 tanh fp32 1 +4 gpu mul fp32 1 add fp32 1 tanh fp32 1 +5 gpu softmax fp32 1 +----- ++++++ conf1 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv perf 21 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp16 1 add fp32 1 tanh fp32 1 -4 gpu mul fp16 1 add fp32 1 tanh fp32 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu mul fp16 1 add fp16 1 tanh fp16 1 +4 gpu mul fp16 1 add fp16 1 tanh fp16 1 5 gpu softmax fp32 1 ----- +++++ conf2 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf3 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf4 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf5 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf6 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf7 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf8 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf9 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf10 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf11 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf12 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf13 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf14 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf15 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf16 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf17 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf18 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf19 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf20 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf21 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf22 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf23 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf24 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf25 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf26 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf27 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf28 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf29 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf30 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf31 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf32 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf33 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf34 2.00016617632 0 99.620003 0.4699970000000008 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf35 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf36 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf37 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf38 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf39 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf40 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf41 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf42 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf43 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf44 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf45 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf46 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf47 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf48 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf49 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf50 2.00016617632 0 98.400002 1.9349969999999956 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf51 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf52 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf53 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf54 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf55 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf56 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf57 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf58 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf59 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf60 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf61 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf62 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf63 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf64 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf65 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf66 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf67 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf68 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf69 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf70 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf71 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf72 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf73 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf74 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf75 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf76 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf77 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf78 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf79 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf80 1.97610564729 0 98.480003 1.814995500000002 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf81 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf82 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf83 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf84 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf85 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf86 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf87 1.99590274244 0 98.519997 1.7550044999999912 -1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf88 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf89 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf90 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf91 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf92 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf93 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf94 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf95 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf96 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf97 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf98 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf99 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf100 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf101 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf102 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf103 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf104 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf105 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf106 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf107 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf108 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf109 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf110 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf111 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- +++++ conf112 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 +1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 3 gpu mul fp16 1 add fp16 1 tanh fp16 1 4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 +5 gpu softmax fp32 1 ----- -- GitLab